Author: jing9 Date: Sat Feb 15 00:28:21 2014 New Revision: 1568563 URL: http://svn.apache.org/r1568563 Log: HDFS-5920. Support rollback of rolling upgrade in NameNode and JournalNodes. Contributed by Jing Zhao.
Added: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgradeRollback.java Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/CHANGES_HDFS-5535.txt hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/CHANGES_HDFS-5535.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/CHANGES_HDFS-5535.txt?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/CHANGES_HDFS-5535.txt (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/CHANGES_HDFS-5535.txt Sat Feb 15 00:28:21 2014 @@ -40,3 +40,6 @@ HDFS-5535 subtasks: break after the merge. (Jing Zhao via Arpit Agarwal) HDFS-5585. Provide admin commands for data node upgrade (kihwal) + + HDFS-5920. Support rollback of rolling upgrade in NameNode and JournalNodes. + (jing9) Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java Sat Feb 15 00:28:21 2014 @@ -693,6 +693,11 @@ public class BookKeeperJournalManager im } @Override + public void discardSegments(long startTxId) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override public void close() throws IOException { try { bkc.close(); Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java Sat Feb 15 00:28:21 2014 @@ -164,5 +164,7 @@ interface AsyncLogger { public ListenableFuture<Void> doRollback(); + public ListenableFuture<Void> discardSegments(long startTxId); + public ListenableFuture<Long> getJournalCTime(); } Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java Sat Feb 15 00:28:21 2014 @@ -366,6 +366,15 @@ class AsyncLoggerSet { return QuorumCall.create(calls); } + public QuorumCall<AsyncLogger, Void> discardSegments(long startTxId) { + Map<AsyncLogger, ListenableFuture<Void>> calls = Maps.newHashMap(); + for (AsyncLogger logger : loggers) { + ListenableFuture<Void> future = logger.discardSegments(startTxId); + calls.put(logger, future); + } + return QuorumCall.create(calls); + } + public QuorumCall<AsyncLogger, Long> getJournalCTime() { Map<AsyncLogger, ListenableFuture<Long>> calls = Maps.newHashMap(); Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java Sat Feb 15 00:28:21 2014 @@ -621,7 +621,18 @@ public class IPCLoggerChannel implements } }); } - + + @Override + public ListenableFuture<Void> discardSegments(final long startTxId) { + return executor.submit(new Callable<Void>() { + @Override + public Void call() throws IOException { + getProxy().discardSegments(journalId, startTxId); + return null; + } + }); + } + @Override public ListenableFuture<Long> getJournalCTime() { return executor.submit(new Callable<Long>() { Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java Sat Feb 15 00:28:21 2014 @@ -86,6 +86,7 @@ public class QuorumJournalManager implem private static final int FINALIZE_TIMEOUT_MS = 60000; private static final int PRE_UPGRADE_TIMEOUT_MS = 60000; private static final int ROLL_BACK_TIMEOUT_MS = 60000; + private static final int DISCARD_SEGMENTS_TIMEOUT_MS = 60000; private static final int UPGRADE_TIMEOUT_MS = 60000; private static final int GET_JOURNAL_CTIME_TIMEOUT_MS = 60000; @@ -601,6 +602,25 @@ public class QuorumJournalManager implem } @Override + public void discardSegments(long startTxId) throws IOException { + QuorumCall<AsyncLogger, Void> call = loggers.discardSegments(startTxId); + try { + call.waitFor(loggers.size(), loggers.size(), 0, + DISCARD_SEGMENTS_TIMEOUT_MS, "discardSegments"); + if (call.countExceptions() > 0) { + call.rethrowException( + "Could not perform discardSegments of one or more JournalNodes"); + } + } catch (InterruptedException e) { + throw new IOException( + "Interrupted waiting for discardSegments() response"); + } catch (TimeoutException e) { + throw new IOException( + "Timed out waiting for discardSegments() response"); + } + } + + @Override public long getJournalCTime() throws IOException { QuorumCall<AsyncLogger, Long> call = loggers.getJournalCTime(); try { Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java Sat Feb 15 00:28:21 2014 @@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.qjournal.s import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.namenode.JournalManager; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; +import org.apache.hadoop.io.retry.Idempotent; import org.apache.hadoop.security.KerberosInfo; /** @@ -156,5 +157,13 @@ public interface QJournalProtocol { public void doRollback(String journalId) throws IOException; + /** + * Discard journal segments whose first TxId is greater than or equal to the + * given txid. + */ + @Idempotent + public void discardSegments(String journalId, long startTxId) + throws IOException; + public Long getJournalCTime(String journalId) throws IOException; } Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java Sat Feb 15 00:28:21 2014 @@ -30,6 +30,8 @@ import org.apache.hadoop.hdfs.qjournal.p import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.AcceptRecoveryResponseProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.CanRollBackRequestProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.CanRollBackResponseProto; +import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DiscardSegmentsRequestProto; +import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DiscardSegmentsResponseProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoFinalizeRequestProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoFinalizeResponseProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoPreUpgradeRequestProto; @@ -64,8 +66,8 @@ import org.apache.hadoop.hdfs.qjournal.p import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.StartLogSegmentRequestProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.StartLogSegmentResponseProto; import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo; -import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType; +import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.protocol.JournalProtocol; import com.google.protobuf.RpcController; @@ -324,6 +326,18 @@ public class QJournalProtocolServerSideT } @Override + public DiscardSegmentsResponseProto discardSegments( + RpcController controller, DiscardSegmentsRequestProto request) + throws ServiceException { + try { + impl.discardSegments(convert(request.getJid()), request.getStartTxId()); + return DiscardSegmentsResponseProto.getDefaultInstance(); + } catch (IOException e) { + throw new ServiceException(e); + } + } + + @Override public GetJournalCTimeResponseProto getJournalCTime(RpcController controller, GetJournalCTimeRequestProto request) throws ServiceException { try { Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java Sat Feb 15 00:28:21 2014 @@ -30,6 +30,7 @@ import org.apache.hadoop.hdfs.qjournal.p import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.AcceptRecoveryRequestProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.CanRollBackRequestProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.CanRollBackResponseProto; +import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DiscardSegmentsRequestProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoFinalizeRequestProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoPreUpgradeRequestProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoRollbackRequestProto; @@ -355,6 +356,19 @@ public class QJournalProtocolTranslatorP } @Override + public void discardSegments(String journalId, long startTxId) + throws IOException { + try { + rpcProxy.discardSegments(NULL_CONTROLLER, + DiscardSegmentsRequestProto.newBuilder() + .setJid(convertJournalId(journalId)).setStartTxId(startTxId) + .build()); + } catch (ServiceException e) { + throw ProtobufHelper.getRemoteException(e); + } + } + + @Override public Long getJournalCTime(String journalId) throws IOException { try { GetJournalCTimeResponseProto response = rpcProxy.getJournalCTime( Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java Sat Feb 15 00:28:21 2014 @@ -1037,6 +1037,12 @@ public class Journal implements Closeabl storage.getJournalManager().doRollback(); } + public void discardSegments(long startTxId) throws IOException { + storage.getJournalManager().discardSegments(startTxId); + // we delete all the segments after the startTxId. let's reset committedTxnId + committedTxnId.set(startTxId - 1); + } + public Long getJournalCTime() throws IOException { return storage.getJournalManager().getJournalCTime(); } Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java Sat Feb 15 00:28:21 2014 @@ -309,6 +309,11 @@ public class JournalNode implements Tool getOrCreateJournal(journalId).doRollback(); } + public void discardSegments(String journalId, long startTxId) + throws IOException { + getOrCreateJournal(journalId).discardSegments(startTxId); + } + public Long getJournalCTime(String journalId) throws IOException { return getOrCreateJournal(journalId).getJournalCTime(); } Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java Sat Feb 15 00:28:21 2014 @@ -234,6 +234,12 @@ class JournalNodeRpcServer implements QJ } @Override + public void discardSegments(String journalId, long startTxId) + throws IOException { + jn.discardSegments(journalId, startTxId); + } + + @Override public Long getJournalCTime(String journalId) throws IOException { return jn.getJournalCTime(journalId); } Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java Sat Feb 15 00:28:21 2014 @@ -127,6 +127,11 @@ class BackupJournalManager implements Jo } @Override + public void discardSegments(long startTxId) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override public long getJournalCTime() throws IOException { throw new UnsupportedOperationException(); } Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java Sat Feb 15 00:28:21 2014 @@ -1406,7 +1406,14 @@ public class FSEditLog implements LogsPu } } } - + + public synchronized void discardSegments(long markerTxid) + throws IOException { + for (JournalAndStream jas : journalSet.getAllJournalStreams()) { + jas.getManager().discardSegments(markerTxid); + } + } + @Override public void selectInputStreams(Collection<EditLogInputStream> streams, long fromTxId, boolean inProgressOk) throws IOException { Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java Sat Feb 15 00:28:21 2014 @@ -543,18 +543,14 @@ public class FSImage implements Closeabl private boolean loadFSImage(FSNamesystem target, StartupOption startOpt, MetaRecoveryContext recovery) throws IOException { - final NameNodeFile nnf; - if (startOpt == StartupOption.ROLLINGUPGRADE - && startOpt.getRollingUpgradeStartupOption() - == RollingUpgradeStartupOption.ROLLBACK) { - nnf = NameNodeFile.IMAGE_ROLLBACK; - } else { - nnf = NameNodeFile.IMAGE; - } + final boolean rollingRollback = startOpt == StartupOption.ROLLINGUPGRADE + && startOpt.getRollingUpgradeStartupOption() == + RollingUpgradeStartupOption.ROLLBACK; + final NameNodeFile nnf = rollingRollback ? NameNodeFile.IMAGE_ROLLBACK + : NameNodeFile.IMAGE; final FSImageStorageInspector inspector = storage.readAndInspectDirs(nnf); isUpgradeFinalized = inspector.isUpgradeFinalized(); - List<FSImageFile> imageFiles = inspector.getLatestImages(); StartupProgress prog = NameNode.getStartupProgress(); @@ -573,7 +569,17 @@ public class FSImage implements Closeabl // If we're open for write, we're either non-HA or we're the active NN, so // we better be able to load all the edits. If we're the standby NN, it's // OK to not be able to read all of edits right now. - long toAtLeastTxId = editLog.isOpenForWrite() ? inspector.getMaxSeenTxId() : 0; + // In the meanwhile, for HA upgrade, we will still write editlog thus need + // this toAtLeastTxId to be set to the max-seen txid + // For rollback in rolling upgrade, we need to set the toAtLeastTxId to + // the txid right before the upgrade marker. + long toAtLeastTxId = editLog.isOpenForWrite() ? inspector + .getMaxSeenTxId() : 0; + if (rollingRollback) { + // note that the first image in imageFiles is the special checkpoint + // for the rolling upgrade + toAtLeastTxId = imageFiles.get(0).getCheckpointTxId() + 2; + } editStreams = editLog.selectInputStreams( imageFiles.get(0).getCheckpointTxId() + 1, toAtLeastTxId, recovery, false); @@ -581,8 +587,7 @@ public class FSImage implements Closeabl editStreams = FSImagePreTransactionalStorageInspector .getEditLogStreams(storage); } - int maxOpSize = conf.getInt(DFSConfigKeys. - DFS_NAMENODE_MAX_OP_SIZE_KEY, + int maxOpSize = conf.getInt(DFSConfigKeys.DFS_NAMENODE_MAX_OP_SIZE_KEY, DFSConfigKeys.DFS_NAMENODE_MAX_OP_SIZE_DEFAULT); for (EditLogInputStream elis : editStreams) { elis.setMaxOpSize(maxOpSize); @@ -613,13 +618,34 @@ public class FSImage implements Closeabl throw new IOException("Failed to load an FSImage file!"); } prog.endPhase(Phase.LOADING_FSIMAGE); - long txnsAdvanced = loadEdits(editStreams, target, startOpt, recovery); - needToSave |= needsResaveBasedOnStaleCheckpoint(imageFile.getFile(), - txnsAdvanced); + long txnsAdvanced = 0; + + loadEdits(editStreams, target, startOpt, recovery); + if (rollingRollback) { + // Trigger the rollback for rolling upgrade. + // Here lastAppliedTxId == (markerTxId - 1), and we should decrease 1 from + // lastAppliedTxId for the start-segment transaction. + rollingRollback(lastAppliedTxId--, imageFiles.get(0).getCheckpointTxId()); + needToSave = false; + } else { + needToSave |= needsResaveBasedOnStaleCheckpoint(imageFile.getFile(), + txnsAdvanced); + } editLog.setNextTxId(lastAppliedTxId + 1); return needToSave; } + /** rollback for rolling upgrade. */ + private void rollingRollback(long discardSegmentTxId, long ckptId) + throws IOException { + // discard discard unnecessary editlog segments starting from the given id + this.editLog.discardSegments(discardSegmentTxId); + // rename the special checkpoint + renameCheckpoint(ckptId, NameNodeFile.IMAGE_ROLLBACK, NameNodeFile.IMAGE); + // purge all the checkpoints after the marker + archivalManager.purgeCheckpoinsAfter(NameNodeFile.IMAGE, ckptId); + } + void loadFSImageFile(FSNamesystem target, MetaRecoveryContext recovery, FSImageFile imageFile) throws IOException { LOG.debug("Planning to load image :\n" + imageFile); @@ -707,7 +733,7 @@ public class FSImage implements Closeabl private long loadEdits(Iterable<EditLogInputStream> editStreams, FSNamesystem target, StartupOption startOpt, MetaRecoveryContext recovery) - throws IOException { + throws IOException { LOG.debug("About to load edits:\n " + Joiner.on("\n ").join(editStreams)); StartupProgress prog = NameNode.getStartupProgress(); prog.beginPhase(Phase.LOADING_EDITS); @@ -727,15 +753,19 @@ public class FSImage implements Closeabl // have been successfully applied before the error. lastAppliedTxId = loader.getLastAppliedTxId(); } + boolean rollingRollback = startOpt == StartupOption.ROLLINGUPGRADE && + startOpt.getRollingUpgradeStartupOption() == + RollingUpgradeStartupOption.ROLLBACK; // If we are in recovery mode, we may have skipped over some txids. - if (editIn.getLastTxId() != HdfsConstants.INVALID_TXID) { + if (editIn.getLastTxId() != HdfsConstants.INVALID_TXID + && !rollingRollback) { lastAppliedTxId = editIn.getLastTxId(); } } } finally { FSEditLog.closeAllStreams(editStreams); // update the counts - updateCountForQuota(target.dir.rootDir); + updateCountForQuota(target.dir.rootDir); } prog.endPhase(Phase.LOADING_EDITS); return lastAppliedTxId - prevLastAppliedTxId; @@ -836,11 +866,11 @@ public class FSImage implements Closeabl /** * Save the contents of the FS image to the file. */ - void saveFSImage(SaveNamespaceContext context, StorageDirectory sd) - throws IOException { + void saveFSImage(SaveNamespaceContext context, StorageDirectory sd, + NameNodeFile dstType) throws IOException { long txid = context.getTxId(); File newFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE_NEW, txid); - File dstFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE, txid); + File dstFile = NNStorage.getStorageFile(sd, dstType, txid); FSImageFormatProtobuf.Saver saver = new FSImageFormatProtobuf.Saver(context); FSImageCompression compression = FSImageCompression.createCompression(conf); @@ -864,16 +894,19 @@ public class FSImage implements Closeabl private class FSImageSaver implements Runnable { private final SaveNamespaceContext context; private StorageDirectory sd; + private final NameNodeFile nnf; - public FSImageSaver(SaveNamespaceContext context, StorageDirectory sd) { + public FSImageSaver(SaveNamespaceContext context, StorageDirectory sd, + NameNodeFile nnf) { this.context = context; this.sd = sd; + this.nnf = nnf; } @Override public void run() { try { - saveFSImage(context, sd); + saveFSImage(context, sd, nnf); } catch (SaveNamespaceCancelledException snce) { LOG.info("Cancelled image saving for " + sd.getRoot() + ": " + snce.getMessage()); @@ -971,7 +1004,7 @@ public class FSImage implements Closeabl for (Iterator<StorageDirectory> it = storage.dirIterator(NameNodeDirType.IMAGE); it.hasNext();) { StorageDirectory sd = it.next(); - FSImageSaver saver = new FSImageSaver(ctx, sd); + FSImageSaver saver = new FSImageSaver(ctx, sd, nnf); Thread saveThread = new Thread(saver, saver.toString()); saveThreads.add(saveThread); saveThread.start(); Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java Sat Feb 15 00:28:21 2014 @@ -198,6 +198,32 @@ public class FileJournalManager implemen return ret; } + + /** + * Discard all editlog segments whose first txid is greater than or equal to + * the given txid, by renaming them with suffix ".trash". + */ + private void discardEditLogSegments(long startTxId) throws IOException { + File currentDir = sd.getCurrentDir(); + List<EditLogFile> allLogFiles = matchEditLogs(currentDir); + List<EditLogFile> toTrash = Lists.newArrayList(); + LOG.info("Discard the EditLog files, the given start txid is " + startTxId); + // go through the editlog files to make sure the startTxId is right at the + // segment boundary + for (EditLogFile elf : allLogFiles) { + if (elf.getFirstTxId() >= startTxId) { + toTrash.add(elf); + } else { + Preconditions.checkState(elf.getLastTxId() < startTxId); + } + } + + for (EditLogFile elf : toTrash) { + // rename these editlog file as .trash + elf.moveAsideTrashFile(startTxId); + LOG.info("Trash the EditLog file " + elf); + } + } /** * returns matching edit logs via the log directory. Simple helper function @@ -467,6 +493,11 @@ public class FileJournalManager implemen renameSelf(".corrupt"); } + void moveAsideTrashFile(long markerTxid) throws IOException { + assert this.getFirstTxId() >= markerTxid; + renameSelf(".trash"); + } + public void moveAsideEmptyFile() throws IOException { assert lastTxId == HdfsConstants.INVALID_TXID; renameSelf(".empty"); @@ -531,6 +562,11 @@ public class FileJournalManager implemen } @Override + public void discardSegments(long startTxid) throws IOException { + discardEditLogSegments(startTxid); + } + + @Override public long getJournalCTime() throws IOException { StorageInfo sInfo = new StorageInfo(NodeType.NAME_NODE); sInfo.readProperties(sd); Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java Sat Feb 15 00:28:21 2014 @@ -109,7 +109,15 @@ public interface JournalManager extends * roll back their state should just return without error. */ void doRollback() throws IOException; - + + /** + * Discard the segments whose first txid is >= the given txid. + * @param startTxId The given txid should be right at the segment boundary, + * i.e., it should be the first txid of some segment, if segment corresponding + * to the txid exists. + */ + void discardSegments(long startTxId) throws IOException; + /** * @return the CTime of the journal manager. */ Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java Sat Feb 15 00:28:21 2014 @@ -700,6 +700,12 @@ public class JournalSet implements Journ } @Override + public void discardSegments(long startTxId) throws IOException { + // This operation is handled by FSEditLog directly. + throw new UnsupportedOperationException(); + } + + @Override public long getJournalCTime() throws IOException { // This operation is handled by FSEditLog directly. throw new UnsupportedOperationException(); Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java Sat Feb 15 00:28:21 2014 @@ -89,7 +89,8 @@ public class NNStorage extends Storage i * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which * stores both fsimage and edits. */ - static enum NameNodeDirType implements StorageDirType { + @VisibleForTesting + public static enum NameNodeDirType implements StorageDirType { UNDEFINED, IMAGE, EDITS, @@ -657,20 +658,27 @@ public class NNStorage extends Storage i @VisibleForTesting public static String getCheckpointImageFileName(long txid) { - return String.format("%s_%019d", - NameNodeFile.IMAGE_NEW.getName(), txid); + return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid); } @VisibleForTesting public static String getImageFileName(long txid) { - return String.format("%s_%019d", - NameNodeFile.IMAGE.getName(), txid); + return getNameNodeFileName(NameNodeFile.IMAGE, txid); } - + + @VisibleForTesting + public static String getRollbackImageFileName(long txid) { + return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid); + } + + @VisibleForTesting + private static String getNameNodeFileName(NameNodeFile nnf, long txid) { + return String.format("%s_%019d", nnf.getName(), txid); + } + @VisibleForTesting public static String getInProgressEditsFileName(long startTxId) { - return String.format("%s_%019d", NameNodeFile.EDITS_INPROGRESS.getName(), - startTxId); + return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId); } static File getInProgressEditsFile(StorageDirectory sd, long startTxId) { Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java Sat Feb 15 00:28:21 2014 @@ -90,11 +90,18 @@ public class NNStorageRetentionManager { } void purgeCheckpoints(NameNodeFile nnf) throws IOException { + purgeCheckpoinsAfter(nnf, -1); + } + + void purgeCheckpoinsAfter(NameNodeFile nnf, long fromTxId) + throws IOException { FSImageTransactionalStorageInspector inspector = new FSImageTransactionalStorageInspector(nnf); storage.inspectStorageDirs(inspector); for (FSImageFile image : inspector.getFoundImages()) { - purger.purgeImage(image); + if (image.getCheckpointTxId() > fromTxId) { + purger.purgeImage(image); + } } } Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto Sat Feb 15 00:28:21 2014 @@ -200,6 +200,17 @@ message DoRollbackResponseProto { } /** + * discardSegments() + */ +message DiscardSegmentsRequestProto { + required JournalIdProto jid = 1; + required uint64 startTxId = 2; +} + +message DiscardSegmentsResponseProto { +} + +/** * getJournalState() */ message GetJournalStateRequestProto { @@ -314,6 +325,8 @@ service QJournalProtocolService { rpc doRollback(DoRollbackRequestProto) returns (DoRollbackResponseProto); + rpc discardSegments(DiscardSegmentsRequestProto) returns (DiscardSegmentsResponseProto); + rpc getJournalState(GetJournalStateRequestProto) returns (GetJournalStateResponseProto); rpc newEpoch(NewEpochRequestProto) returns (NewEpochResponseProto); Added: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgradeRollback.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgradeRollback.java?rev=1568563&view=auto ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgradeRollback.java (added) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgradeRollback.java Sat Feb 15 00:28:21 2014 @@ -0,0 +1,196 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs; + +import java.io.File; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.qjournal.MiniJournalCluster; +import org.apache.hadoop.hdfs.server.namenode.INode; +import org.apache.hadoop.hdfs.server.namenode.NNStorage; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.tools.DFSAdmin; +import org.junit.Assert; +import org.junit.Test; + +/** + * This class tests rollback for rolling upgrade. + */ +public class TestRollingUpgradeRollback { + + private static final int NUM_JOURNAL_NODES = 3; + private static final String JOURNAL_ID = "myjournal"; + + private static boolean fileExists(List<File> files) { + for (File file : files) { + if (file.exists()) { + return true; + } + } + return false; + } + + private void checkNNStorage(NNStorage storage, long imageTxId, + long trashEndTxId) { + List<File> finalizedEdits = storage.getFiles( + NNStorage.NameNodeDirType.EDITS, + NNStorage.getFinalizedEditsFileName(1, imageTxId)); + Assert.assertTrue(fileExists(finalizedEdits)); + List<File> inprogressEdits = storage.getFiles( + NNStorage.NameNodeDirType.EDITS, + NNStorage.getInProgressEditsFileName(imageTxId + 1)); + // For rollback case we will have an inprogress file for future transactions + Assert.assertTrue(fileExists(inprogressEdits)); + if (trashEndTxId > 0) { + List<File> trashedEdits = storage.getFiles( + NNStorage.NameNodeDirType.EDITS, + NNStorage.getFinalizedEditsFileName(imageTxId + 1, trashEndTxId) + + ".trash"); + Assert.assertTrue(fileExists(trashedEdits)); + } + String imageFileName = trashEndTxId > 0 ? NNStorage + .getImageFileName(imageTxId) : NNStorage + .getRollbackImageFileName(imageTxId); + List<File> imageFiles = storage.getFiles( + NNStorage.NameNodeDirType.IMAGE, imageFileName); + Assert.assertTrue(fileExists(imageFiles)); + } + + private void checkJNStorage(File dir, long discardStartTxId, + long discardEndTxId) { + File finalizedEdits = new File(dir, NNStorage.getFinalizedEditsFileName(1, + discardStartTxId - 1)); + Assert.assertTrue(finalizedEdits.exists()); + File trashEdits = new File(dir, NNStorage.getFinalizedEditsFileName( + discardStartTxId, discardEndTxId) + ".trash"); + Assert.assertTrue(trashEdits.exists()); + } + + @Test + public void testRollbackCommand() throws Exception { + final Configuration conf = new HdfsConfiguration(); + MiniDFSCluster cluster = null; + final Path foo = new Path("/foo"); + final Path bar = new Path("/bar"); + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); + cluster.waitActive(); + + final DistributedFileSystem dfs = cluster.getFileSystem(); + final DFSAdmin dfsadmin = new DFSAdmin(conf); + dfs.mkdirs(foo); + + // start rolling upgrade + Assert.assertEquals(0, + dfsadmin.run(new String[] { "-rollingUpgrade", "start" })); + // create new directory + dfs.mkdirs(bar); + + // check NNStorage + NNStorage storage = cluster.getNamesystem().getFSImage().getStorage(); + checkNNStorage(storage, 3, -1); // (startSegment, mkdir, endSegment) + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + + NameNode nn = null; + try { + nn = NameNode.createNameNode(new String[] { "-rollingUpgrade", + "rollback" }, conf); + // make sure /foo is still there, but /bar is not + INode fooNode = nn.getNamesystem().getFSDirectory() + .getINode4Write(foo.toString()); + Assert.assertNotNull(fooNode); + INode barNode = nn.getNamesystem().getFSDirectory() + .getINode4Write(bar.toString()); + Assert.assertNull(barNode); + + // check the details of NNStorage + NNStorage storage = nn.getNamesystem().getFSImage().getStorage(); + // (startSegment, upgrade marker, mkdir, endSegment) + checkNNStorage(storage, 3, 7); + } finally { + if (nn != null) { + nn.stop(); + nn.join(); + } + } + } + + @Test + public void testRollbackWithQJM() throws Exception { + final Configuration conf = new HdfsConfiguration(); + MiniJournalCluster mjc = null; + MiniDFSCluster cluster = null; + final Path foo = new Path("/foo"); + final Path bar = new Path("/bar"); + + try { + mjc = new MiniJournalCluster.Builder(conf).numJournalNodes( + NUM_JOURNAL_NODES).build(); + conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, mjc + .getQuorumJournalURI(JOURNAL_ID).toString()); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); + cluster.waitActive(); + + DistributedFileSystem dfs = cluster.getFileSystem(); + final DFSAdmin dfsadmin = new DFSAdmin(conf); + dfs.mkdirs(foo); + + // start rolling upgrade + Assert.assertEquals(0, + dfsadmin.run(new String[] { "-rollingUpgrade", "start" })); + // create new directory + dfs.mkdirs(bar); + dfs.close(); + + // rollback + cluster.restartNameNode("-rollingUpgrade", "rollback"); + // make sure /foo is still there, but /bar is not + dfs = cluster.getFileSystem(); + Assert.assertTrue(dfs.exists(foo)); + Assert.assertFalse(dfs.exists(bar)); + + // check storage in JNs + for (int i = 0; i < NUM_JOURNAL_NODES; i++) { + File dir = mjc.getCurrentDir(0, JOURNAL_ID); + // segments:(startSegment, mkdir, endSegment), (startSegment, upgrade + // marker, mkdir, endSegment) + checkJNStorage(dir, 4, 7); + } + } finally { + if (cluster != null) { + cluster.shutdown(); + } + if (mjc != null) { + mjc.shutdown(); + } + } + } + + /** + * TODO: Test rollback scenarios where StandbyNameNode does checkpoints during + * rolling upgrade. + */ + + // TODO: rollback could not succeed in all JN +} \ No newline at end of file Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java Sat Feb 15 00:28:21 2014 @@ -213,6 +213,9 @@ public class TestGenericJournalConf { public void doRollback() throws IOException {} @Override + public void discardSegments(long startTxId) throws IOException {} + + @Override public long getJournalCTime() throws IOException { return -1; } Modified: hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java?rev=1568563&r1=1568562&r2=1568563&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java (original) +++ hadoop/common/branches/HDFS-5535/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java Sat Feb 15 00:28:21 2014 @@ -141,7 +141,7 @@ public class TestSaveNamespace { doAnswer(new FaultySaveImage(true)). when(spyImage).saveFSImage( (SaveNamespaceContext)anyObject(), - (StorageDirectory)anyObject()); + (StorageDirectory)anyObject(), (NameNodeFile) anyObject()); shouldFail = false; break; case SAVE_SECOND_FSIMAGE_IOE: @@ -149,7 +149,7 @@ public class TestSaveNamespace { doAnswer(new FaultySaveImage(false)). when(spyImage).saveFSImage( (SaveNamespaceContext)anyObject(), - (StorageDirectory)anyObject()); + (StorageDirectory)anyObject(), (NameNodeFile) anyObject()); shouldFail = false; break; case SAVE_ALL_FSIMAGES: @@ -157,7 +157,7 @@ public class TestSaveNamespace { doThrow(new RuntimeException("Injected")). when(spyImage).saveFSImage( (SaveNamespaceContext)anyObject(), - (StorageDirectory)anyObject()); + (StorageDirectory)anyObject(), (NameNodeFile) anyObject()); shouldFail = true; break; case WRITE_STORAGE_ALL: @@ -382,7 +382,7 @@ public class TestSaveNamespace { doThrow(new IOException("Injected fault: saveFSImage")). when(spyImage).saveFSImage( (SaveNamespaceContext)anyObject(), - (StorageDirectory)anyObject()); + (StorageDirectory)anyObject(), (NameNodeFile) anyObject()); try { doAnEdit(fsn, 1);