Author: todd
Date: Thu Jan 19 22:35:04 2012
New Revision: 1233612
URL: http://svn.apache.org/viewvc?rev=1233612&view=rev
Log:
HDFS-2812. When becoming active, the NN should treat all leases as freshly
renewed. Contributed by Todd Lipcon.
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt?rev=1233612&r1=1233611&r2=1233612&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
(original)
+++
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
Thu Jan 19 22:35:04 2012
@@ -117,3 +117,5 @@ HDFS-2795. Standby NN takes a long time
HDFS-2592. Balancer support for HA namenodes. (Uma Maheswara Rao G via todd)
HDFS-2367. Enable the configuration of multiple HA cluster addresses. (atm)
+
+HDFS-2812. When becoming active, the NN should treat all leases as freshly
renewed. (todd)
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1233612&r1=1233611&r2=1233612&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
(original)
+++
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
Thu Jan 19 22:35:04 2012
@@ -337,6 +337,8 @@ public class FSNamesystem implements Nam
*/
private HAContext haContext;
+ private boolean haEnabled;
+
private final Configuration conf;
PendingDataNodeMessages getPendingDataNodeMessages() {
@@ -545,6 +547,13 @@ public class FSNamesystem implements Nam
if (UserGroupInformation.isSecurityEnabled()) {
startSecretManager();
}
+ if (haEnabled) {
+ // Renew all of the leases before becoming active.
+ // This is because, while we were in standby mode,
+ // the leases weren't getting renewed on this NN.
+ // Give them all a fresh start here.
+ leaseManager.renewAllLeases();
+ }
leaseManager.startMonitor();
} finally {
writeUnlock();
@@ -737,8 +746,8 @@ public class FSNamesystem implements Nam
// block allocation has to be persisted in HA using a shared edits
directory
// so that the standby has up-to-date namespace information
String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
- this.persistBlocks |= HAUtil.isHAEnabled(conf, nameserviceId) &&
- HAUtil.usesSharedEditsDir(conf);
+ this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);
+ this.persistBlocks |= haEnabled && HAUtil.usesSharedEditsDir(conf);
short filePermission =
(short)conf.getInt(DFS_NAMENODE_UPGRADE_PERMISSION_KEY,
DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT);
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java?rev=1233612&r1=1233611&r2=1233612&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
(original)
+++
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
Thu Jan 19 22:35:04 2012
@@ -200,6 +200,15 @@ public class LeaseManager {
}
}
+ /**
+ * Renew all of the currently open leases.
+ */
+ synchronized void renewAllLeases() {
+ for (Lease l : leases.values()) {
+ renewLease(l);
+ }
+ }
+
/************************************************************
* A Lease governs all the locks held by a single client.
* For each client there's a corresponding lease, whose
@@ -306,6 +315,11 @@ public class LeaseManager {
paths.remove(oldpath);
paths.add(newpath);
}
+
+ @VisibleForTesting
+ long getLastUpdate() {
+ return lastUpdate;
+ }
}
synchronized void changeLease(String src, String dst,
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java?rev=1233612&r1=1233611&r2=1233612&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
(original)
+++
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
Thu Jan 19 22:35:04 2012
@@ -28,6 +28,7 @@ import org.apache.hadoop.hdfs.protocol.L
import
org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
+import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
import org.apache.hadoop.ipc.Server;
@@ -127,6 +128,19 @@ public class NameNodeAdapter {
}
/**
+ * @return the timestamp of the last renewal of the given lease,
+ * or -1 in the case that the lease doesn't exist.
+ */
+ public static long getLeaseRenewalTime(NameNode nn, String path) {
+ LeaseManager lm = nn.getNamesystem().leaseManager;
+ Lease l = lm.getLeaseByPath(path);
+ if (l == null) {
+ return -1;
+ }
+ return l.getLastUpdate();
+ }
+
+ /**
* Return the datanode descriptor for the given datanode.
*/
public static DatanodeDescriptor getDatanode(final FSNamesystem ns,
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java?rev=1233612&r1=1233611&r2=1233612&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
(original)
+++
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
Thu Jan 19 22:35:04 2012
@@ -24,15 +24,19 @@ import java.util.concurrent.locks.Reentr
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
+import org.apache.tools.ant.taskdefs.WaitFor;
import org.junit.Test;
import org.mockito.Mockito;
@@ -45,6 +49,7 @@ public class TestHAStateTransitions {
TestStandbyIsHot.class);
private static final Path TEST_DIR = new Path("/test");
private static final Path TEST_FILE_PATH = new Path(TEST_DIR, "foo");
+ private static final String TEST_FILE_STR = TEST_FILE_PATH.toUri().getPath();
private static final String TEST_FILE_DATA =
"Hello state transitioning world";
@@ -191,4 +196,59 @@ public class TestHAStateTransitions {
cluster.shutdown();
}
}
+
+ /**
+ * Test for HDFS-2812. Since lease renewals go from the client
+ * only to the active NN, the SBN will have out-of-date lease
+ * info when it becomes active. We need to make sure we don't
+ * accidentally mark the leases as expired when the failover
+ * proceeds.
+ */
+ @Test(timeout=120000)
+ public void testLeasesRenewedOnTransition() throws Exception {
+ Configuration conf = new Configuration();
+ MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+ .nnTopology(MiniDFSNNTopology.simpleHATopology())
+ .numDataNodes(1)
+ .build();
+ FSDataOutputStream stm = null;
+ FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
+ NameNode nn0 = cluster.getNameNode(0);
+ NameNode nn1 = cluster.getNameNode(1);
+ nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
+ nn1.getNamesystem().getEditLogTailer().interrupt();
+
+ try {
+ cluster.waitActive();
+ cluster.transitionToActive(0);
+
+ LOG.info("Starting with NN 0 active");
+
+ stm = fs.create(TEST_FILE_PATH);
+ long nn0t0 = NameNodeAdapter.getLeaseRenewalTime(nn0, TEST_FILE_STR);
+ assertTrue(nn0t0 > 0);
+ long nn1t0 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
+ assertEquals("Lease should not yet exist on nn1",
+ -1, nn1t0);
+
+ Thread.sleep(5); // make sure time advances!
+
+ HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+ long nn1t1 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
+ assertTrue("Lease should have been created on standby. Time was: " +
+ nn1t1, nn1t1 > nn0t0);
+
+ Thread.sleep(5); // make sure time advances!
+
+ LOG.info("Failing over to NN 1");
+ cluster.transitionToStandby(0);
+ cluster.transitionToActive(1);
+ long nn1t2 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
+ assertTrue("Lease should have been renewed by failover process",
+ nn1t2 > nn1t1);
+ } finally {
+ IOUtils.closeStream(stm);
+ cluster.shutdown();
+ }
+ }
}