Author: jing9 Date: Wed Mar 5 18:22:04 2014 New Revision: 1574603 URL: http://svn.apache.org/r1574603 Log: HDFS-5167. Add metrics about the NameNode retry cache. Contributed by Tsuyoshi OZAWA.
Added: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRetryCacheMetrics.java Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1574603&r1=1574602&r2=1574603&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Wed Mar 5 18:22:04 2014 @@ -519,6 +519,9 @@ Release 2.4.0 - UNRELEASED HDFS-5321. Clean up the HTTP-related configuration in HDFS (wheat9) + HDFS-5167. Add metrics about the NameNode retry cache. (Tsuyoshi OZAWA via + jing9) + OPTIMIZATIONS HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1574603&r1=1574602&r2=1574603&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Wed Mar 5 18:22:04 2014 @@ -822,7 +822,7 @@ public class FSNamesystem implements Nam retryCache.addCacheEntry(clientId, callId); } } - + @VisibleForTesting static RetryCache initRetryCache(Configuration conf) { boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY, @@ -839,7 +839,7 @@ public class FSNamesystem implements Nam + " of total heap and retry cache entry expiry time is " + entryExpiryMillis + " millis"); long entryExpiryNanos = entryExpiryMillis * 1000 * 1000; - return new RetryCache("Namenode Retry Cache", heapPercent, + return new RetryCache("NameNodeRetryCache", heapPercent, entryExpiryNanos); } return null; Added: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRetryCacheMetrics.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRetryCacheMetrics.java?rev=1574603&view=auto ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRetryCacheMetrics.java (added) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRetryCacheMetrics.java Wed Mar 5 18:22:04 2014 @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; + +import org.apache.hadoop.hdfs.DFSClient; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; +import org.apache.hadoop.ipc.metrics.RetryCacheMetrics; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.junit.Before; +import org.junit.After; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY; + +/** + * Tests for ensuring the namenode retry cache metrics works correctly for + * non-idempotent requests. + * + * Retry cache works based on tracking previously received request based on the + * ClientId and CallId received in RPC requests and storing the response. The + * response is replayed on retry when the same request is received again. + * + */ +public class TestNameNodeRetryCacheMetrics { + private MiniDFSCluster cluster; + private FSNamesystem namesystem; + private DistributedFileSystem filesystem; + private int namenodeId = 0; + private Configuration conf; + private RetryCacheMetrics metrics; + + private DFSClient client; + + /** Start a cluster */ + @Before + public void setup() throws Exception { + conf = new HdfsConfiguration(); + conf.setBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY, true); + conf.setInt(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, 2); + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(3) + .build(); + cluster.waitActive(); + cluster.transitionToActive(namenodeId); + HATestUtil.setFailoverConfigurations(cluster, conf); + filesystem = (DistributedFileSystem) HATestUtil.configureFailoverFs(cluster, conf); + namesystem = cluster.getNamesystem(namenodeId); + metrics = namesystem.getRetryCache().getMetricsForTests(); + } + + /** + * Cleanup after the test + * @throws IOException + **/ + @After + public void cleanup() throws IOException { + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test + public void testRetryCacheMetrics() throws IOException { + checkMetrics(0, 0, 0); + + // DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY is 2 , + // so 2 requests are dropped at first. + // After that, 1 request will reach NameNode correctly. + trySaveNamespace(); + checkMetrics(2, 0, 1); + + // RetryCache will be cleared after Namesystem#close() + namesystem.close(); + checkMetrics(2, 1, 1); + } + + private void checkMetrics(long hit, long cleared, long updated) { + assertEquals("CacheHit", hit, metrics.getCacheHit()); + assertEquals("CacheCleared", cleared, metrics.getCacheCleared()); + assertEquals("CacheUpdated", updated, metrics.getCacheUpdated()); + } + + private void trySaveNamespace() throws IOException { + filesystem.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER); + filesystem.saveNamespace(); + filesystem.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE); + } + +} Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java?rev=1574603&r1=1574602&r2=1574603&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java Wed Mar 5 18:22:04 2014 @@ -1181,6 +1181,26 @@ public class TestRetryCacheWithHA { LOG.info("Got the result of " + op.name + ": " + results.get(op.name)); } + + // Waiting for failover. + while (cluster.getNamesystem(1).isInStandbyState()) { + Thread.sleep(10); + } + + long hitNN0 = cluster.getNamesystem(0).getRetryCache().getMetricsForTests() + .getCacheHit(); + long hitNN1 = cluster.getNamesystem(1).getRetryCache().getMetricsForTests() + .getCacheHit(); + assertTrue("CacheHit: " + hitNN0 + ", " + hitNN1, + hitNN0 + hitNN1 > 0); + long updatedNN0 = cluster.getNamesystem(0).getRetryCache() + .getMetricsForTests().getCacheUpdated(); + long updatedNN1 = cluster.getNamesystem(1).getRetryCache() + .getMetricsForTests().getCacheUpdated(); + // Cache updated metrics on NN0 should be >0 since the op was process on NN0 + assertTrue("CacheUpdated on NN0: " + updatedNN0, updatedNN0 > 0); + // Cache updated metrics on NN0 should be >0 since NN1 applied the editlog + assertTrue("CacheUpdated on NN1: " + updatedNN1, updatedNN1 > 0); } /**