Author: szetszwo Date: Thu Jun 28 02:48:05 2012 New Revision: 1354798 URL: http://svn.apache.org/viewvc?rev=1354798&view=rev Log: HDFS-3518. Add a utility method DistributedFileSystem.isHealthy(uri) for checking if the given HDFS is healthy.
Modified: hadoop/common/branches/branch-1/CHANGES.txt hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java Modified: hadoop/common/branches/branch-1/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/CHANGES.txt?rev=1354798&r1=1354797&r2=1354798&view=diff ============================================================================== --- hadoop/common/branches/branch-1/CHANGES.txt (original) +++ hadoop/common/branches/branch-1/CHANGES.txt Thu Jun 28 02:48:05 2012 @@ -68,6 +68,9 @@ Release 1.1.0 - unreleased HADOOP-8209. Add option to relax build-version check for branch-1. (eli) + HDFS-3518. Add a utility method DistributedFileSystem.isHealthy(uri) for + checking if the given HDFS is healthy. (szetszwo) + IMPROVEMENTS MAPREDUCE-3597. [Rumen] Provide a way to access other info of history file Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java?rev=1354798&r1=1354797&r2=1354798&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java (original) +++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java Thu Jun 28 02:48:05 2012 @@ -18,29 +18,40 @@ package org.apache.hadoop.hdfs; -import java.io.*; -import java.net.*; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.net.URI; import java.util.ArrayList; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.fs.*; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.protocol.DatanodeInfo; -import org.apache.hadoop.hdfs.protocol.FSConstants; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.protocol.Block; -import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; -import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DirectoryListing; +import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.protocol.FSConstants.DatanodeReportType; +import org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction; import org.apache.hadoop.hdfs.protocol.FSConstants.UpgradeAction; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport; import org.apache.hadoop.hdfs.server.namenode.NameNode; -import org.apache.hadoop.hdfs.DFSClient.DFSOutputStream; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Text; +import org.apache.hadoop.ipc.Client; import org.apache.hadoop.security.AccessControlException; -import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.SecretManager.InvalidToken; +import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.Progressable; @@ -626,4 +637,46 @@ public class DistributedFileSystem exten public void setBalancerBandwidth(long bandwidth) throws IOException { dfs.setBalancerBandwidth(bandwidth); } + + /** + * Is the HDFS healthy? + * HDFS is considered as healthy if it is up and not in safemode. + * + * @param uri the HDFS URI. Note that the URI path is ignored. + * @return true if HDFS is healthy; false, otherwise. + */ + public static boolean isHealthy(URI uri) { + //check scheme + final String scheme = uri.getScheme(); + if (!"hdfs".equalsIgnoreCase(scheme)) { + throw new IllegalArgumentException("This scheme is not hdfs, uri=" + uri); + } + + final Configuration conf = new Configuration(); + //disable FileSystem cache + conf.setBoolean(String.format("fs.%s.impl.disable.cache", scheme), true); + //disable client retry for rpc connection and rpc calls + conf.setBoolean(DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_ENABLED_KEY, false); + conf.setInt(Client.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 0); + + DistributedFileSystem fs = null; + try { + fs = (DistributedFileSystem)FileSystem.get(uri, conf); + final boolean safemode = fs.setSafeMode(SafeModeAction.SAFEMODE_GET); + if (LOG.isDebugEnabled()) { + LOG.debug("Is namenode in safemode? " + safemode + "; uri=" + uri); + } + + fs.close(); + fs = null; + return !safemode; + } catch(IOException e) { + if (LOG.isDebugEnabled()) { + LOG.debug("Got an exception for uri=" + uri, e); + } + return false; + } finally { + IOUtils.cleanup(LOG, fs); + } + } } Modified: hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java?rev=1354798&r1=1354797&r2=1354798&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java (original) +++ hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java Thu Jun 28 02:48:05 2012 @@ -28,6 +28,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.net.InetSocketAddress; import java.net.SocketTimeoutException; +import java.net.URI; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; @@ -542,9 +543,11 @@ public class TestDFSClientRetries extend conf, numDatanodes, true, null); try { cluster.waitActive(); + final DistributedFileSystem dfs = (DistributedFileSystem)cluster.getFileSystem(); + final URI uri = dfs.getUri(); + assertTrue(DistributedFileSystem.isHealthy(uri)); //create a file - final DistributedFileSystem dfs = (DistributedFileSystem)cluster.getFileSystem(); final long length = 1L << 20; final Path file1 = new Path(dir, "foo"); DFSTestUtil.createFile(dfs, file1, length, numDatanodes, 20120406L); @@ -554,7 +557,9 @@ public class TestDFSClientRetries extend assertEquals(length, s1.getLen()); //shutdown namenode + assertTrue(DistributedFileSystem.isHealthy(uri)); cluster.shutdownNameNode(); + assertFalse(DistributedFileSystem.isHealthy(uri)); //namenode is down, create another file in a thread final Path file3 = new Path(dir, "file"); @@ -579,8 +584,10 @@ public class TestDFSClientRetries extend try { //sleep, restart, and then wait active TimeUnit.SECONDS.sleep(30); + assertFalse(DistributedFileSystem.isHealthy(uri)); cluster.restartNameNode(false, false); cluster.waitActive(); + assertTrue(DistributedFileSystem.isHealthy(uri)); } catch (Exception e) { exceptions.add(e); } @@ -596,7 +603,9 @@ public class TestDFSClientRetries extend assertEquals(dfs.getFileChecksum(file1), dfs.getFileChecksum(file3)); //enter safe mode + assertTrue(DistributedFileSystem.isHealthy(uri)); dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + assertFalse(DistributedFileSystem.isHealthy(uri)); //leave safe mode in a new thread new Thread(new Runnable() { @@ -605,7 +614,9 @@ public class TestDFSClientRetries extend try { //sleep and then leave safe mode TimeUnit.SECONDS.sleep(30); + assertFalse(DistributedFileSystem.isHealthy(uri)); dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + assertTrue(DistributedFileSystem.isHealthy(uri)); } catch (Exception e) { exceptions.add(e); } @@ -616,6 +627,8 @@ public class TestDFSClientRetries extend final Path file2 = new Path(dir, "bar"); DFSTestUtil.createFile(dfs, file2, length, numDatanodes, 20120406L); assertEquals(dfs.getFileChecksum(file1), dfs.getFileChecksum(file2)); + + assertTrue(DistributedFileSystem.isHealthy(uri)); //make sure it won't retry on exceptions like FileNotFoundException final Path nonExisting = new Path(dir, "nonExisting");