BIGTOP-2234. TestBlockRecovery incorrectly fails on a single node cluster
Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/937a985e Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/937a985e Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/937a985e Branch: refs/heads/master Commit: 937a985e4f36758bbe22a1bf25fcb07463f7eef0 Parents: 98d3349 Author: Konstantin Boudnik <[email protected]> Authored: Tue Jan 12 23:06:37 2016 -0800 Committer: Konstantin Boudnik <[email protected]> Committed: Thu Jan 14 13:08:17 2016 -0800 ---------------------------------------------------------------------- .../itest/hadoop/hdfs/TestBlockRecovery.groovy | 76 ++++++++++++-------- 1 file changed, 45 insertions(+), 31 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bigtop/blob/937a985e/bigtop-tests/test-artifacts/hadoop/src/main/groovy/org/apache/bigtop/itest/hadoop/hdfs/TestBlockRecovery.groovy ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hadoop/src/main/groovy/org/apache/bigtop/itest/hadoop/hdfs/TestBlockRecovery.groovy b/bigtop-tests/test-artifacts/hadoop/src/main/groovy/org/apache/bigtop/itest/hadoop/hdfs/TestBlockRecovery.groovy index 2a3f2d9..88f20f2 100644 --- a/bigtop-tests/test-artifacts/hadoop/src/main/groovy/org/apache/bigtop/itest/hadoop/hdfs/TestBlockRecovery.groovy +++ b/bigtop-tests/test-artifacts/hadoop/src/main/groovy/org/apache/bigtop/itest/hadoop/hdfs/TestBlockRecovery.groovy @@ -39,6 +39,9 @@ import org.apache.hadoop.hdfs.DFSTestUtil; three datanodes to allow block recovery. Block replication must be set to a minimum value of 2 for this test to work properly. + + If passwordless setup isn't configured for user HDFS the test + will be skipped as well. */ public class TestBlockRecovery { @@ -46,6 +49,7 @@ public class TestBlockRecovery { private static Configuration conf; + private static final String SSH_HDFS_ID = "~/.ssh/id_hdfsuser" private static final String corruptContent = "0123456789"; private static final String HDFS_TEST_DIR = "/tmp/TestBlockRecovery$corruptContent"; private static final String fsFilePath = HDFS_TEST_DIR + "/file0"; @@ -64,18 +68,36 @@ public class TestBlockRecovery { private static String initialBlockChecksum; private static String fileContent; - private static def dataDirs = []; + private static String [] dataDirs; private static def nodesBeforeRecovery = []; private static def nodesAfterRecovery = []; - private static short numberOfDataNodes; - private static short repFactor; - - private static final long fileLen = 10; - private static final long SEED = 0; - @BeforeClass public static void setUp() { + shHDFS.exec("rm -rf $localTestDir"); + shHDFS.exec("mkdir $localTestDir"); + shHDFS.exec("hadoop fs -rm -r $fsFilePath"); + Thread.sleep(TIMEOUT); + shHDFS.exec("hadoop fs -mkdir -p $HDFS_TEST_DIR && hadoop fs -chmod 777 $HDFS_TEST_DIR"); + assertTrue("Failed to create input directory", shHDFS.getRet() == 0); + + } + + @AfterClass + public static void tearDown() { + // deletion of test files + shHDFS.exec("rm -rf $localTestDir"); + assertTrue("Could not delete test directory $localTestDir", shHDFS.getRet() == 0); + } + + @Test + public void testBlockRecovery() { + + short numberOfDataNodes; + short repFactor; + + final long fileLen = 10; + final long SEED = 0; /* Find datanode data directory, make file, add content, ensure replication * is set to guarantee any chosen datanode will have block, * get block and its location, perform checksum before corrupting block @@ -84,20 +106,21 @@ public class TestBlockRecovery { conf = new HdfsConfiguration(); FileSystem fileSys = DistributedFileSystem.get(conf); conf.addResource("hdfs-site.xml"); - dataDirs = conf.get("dfs.data.dir").split(","); - if (dataDirs == null) - dataDirs = conf.get("dfs.datanode.data.dir").split(","); + def confDataDirs = conf.get("dfs.data.dir") + if (confDataDirs == null) + confDataDirs = conf.get("dfs.datanode.data.dir") + // data.dirs might be configured w/ explicit file:// auth. + // Should be stripped-off + dataDirs = confDataDirs.split(",")*.replaceFirst(~/file:\/\//,''); numberOfDataNodes = shHDFS.exec("hdfs dfsadmin -report | grep ^Name | wc -l").getOut()[0] as short; + // to recover a block at least two non-corrupted replicas should exist Assume.assumeTrue(numberOfDataNodes >= 3); + // If passwordless setup isn't configured for user HDFS the test needs to + // be skipped + Assume.assumeTrue(shHDFS.exec("[ -f ${SSH_HDFS_ID} ]").getRet() == 0); - shHDFS.exec("rm -rf $localTestDir"); - shHDFS.exec("mkdir $localTestDir"); - shHDFS.exec("hadoop fs -rm -r $fsFilePath"); - Thread.sleep(TIMEOUT); - shHDFS.exec("hadoop fs -mkdir -p $HDFS_TEST_DIR && hadoop fs -chmod 777 $HDFS_TEST_DIR"); - assertTrue("Failed to create input directory", shHDFS.getRet() == 0); - + // snapshot of everything before the corruption happens repFactor = (numberOfDataNodes - 1); try { DFSTestUtil.createFile(fileSys, new Path(fsFilePath), fileLen, repFactor, SEED); @@ -105,6 +128,7 @@ public class TestBlockRecovery { assert "Exception should not be thrown" } fileContent = shHDFS.exec("hadoop fs -cat $fsFilePath").getOut()[0]; + assertTrue("File $fsFilePath doesn't exist", shHDFS.getRet() == 0); shHDFS.exec("hdfs fsck $fsFilePath -blocks -locations -files > $outputFile"); assertTrue("Could not write output to file", shHDFS.getRet() == 0); @@ -125,19 +149,6 @@ public class TestBlockRecovery { initialBlockChecksum = shHDFS.exec("cksum $blockLocation").getOut()[0].split(" ")[0]; assertTrue("Could not obtain checksum for block $blockToTest", shHDFS.getRet() == 0); - } - - @AfterClass - public static void tearDown() { - // deletion of test files - shHDFS.exec("hadoop fs -rm -r -skipTrash $fsFilePath"); - assertTrue("Could not delete file $fsFilePath", shHDFS.getRet() == 0); - shHDFS.exec("rm -rf $localTestDir"); - assertTrue("Could not delete test directory $localTestDir", shHDFS.getRet() == 0); - } - - @Test - public void testBlockRecovery() { // corrupt block shHDFS.exec("echo $corruptContent > $blockLocation"); assertTrue("Could not write to file", shHDFS.getRet() == 0); @@ -182,7 +193,7 @@ public class TestBlockRecovery { while (cksumAttempt < 3) { if (corruptBlockChecksum != initialBlockChecksum) { sleep(sleepTime); - corruptBlockChecksum = shHDFS.exec("ssh -o StrictHostKeyChecking=no -i ~/.ssh/id_hdfsuser " + + corruptBlockChecksum = shHDFS.exec("ssh -o StrictHostKeyChecking=no -i ${SSH_HDFS_ID} " + "$blockRecoveryNode 'cksum `find ${dataDirs.join(' ')}" + " -name $blockToTest 2>/dev/null | grep $blockToTest` '").getOut()[0].split(" ")[0]; ++cksumAttempt; @@ -196,6 +207,9 @@ public class TestBlockRecovery { } } assertTrue("Block has not recovered", success); + // Let's remove the garbage after the test + shHDFS.exec("hadoop fs -rm -r -skipTrash $fsFilePath"); + assertTrue("Could not delete file $fsFilePath", shHDFS.getRet() == 0); } }
