HDFS-10971. Distcp should not copy replication factor if source file is erasure coded. Contributed by Manoj Govindassamy.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f1a63e73 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f1a63e73 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f1a63e73 Branch: refs/heads/HDFS-10467 Commit: f1a63e73b3a0f09c8158c000acd2815079932b99 Parents: 14434c7 Author: Andrew Wang <w...@apache.org> Authored: Tue Mar 28 22:14:03 2017 -0700 Committer: Inigo <inigo...@apache.org> Committed: Wed Mar 29 19:32:11 2017 -0700 ---------------------------------------------------------------------- .../hadoop/tools/CopyListingFileStatus.java | 4 + .../tools/mapred/RetriableFileCopyCommand.java | 3 + .../apache/hadoop/tools/util/DistCpUtils.java | 9 +- .../hadoop/tools/TestCopyListingFileStatus.java | 1 + .../hadoop/tools/util/TestDistCpUtils.java | 123 ++++++++++++++++++- 5 files changed, 137 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1a63e73/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java index 2b1e7e4..00d4b32 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java @@ -159,6 +159,10 @@ public final class CopyListingFileStatus implements Writable { return permission; } + public boolean isErasureCoded() { + return getPermission().getErasureCodedBit(); + } + /** * Returns the full logical ACL. * http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1a63e73/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java index d1cdfdd..06acd78 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java @@ -167,6 +167,9 @@ public class RetriableFileCopyCommand extends RetriableCommand { FsPermission.getUMask(targetFS.getConf())); final OutputStream outStream; if (action == FileAction.OVERWRITE) { + // If there is an erasure coding policy set on the target directory, + // files will be written to the target directory using the same EC policy. + // The replication factor of the source file is ignored and not preserved. final short repl = getReplicationFactor(fileAttributes, source, targetFS, targetPath); final long blockSize = getBlockSize(fileAttributes, source, http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1a63e73/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java index c308e6f..76bc4c5 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java @@ -236,8 +236,13 @@ public class DistCpUtils { } } - if (attributes.contains(FileAttribute.REPLICATION) && !targetFileStatus.isDirectory() && - (srcFileStatus.getReplication() != targetFileStatus.getReplication())) { + // The replication factor can only be preserved for replicated files. + // It is ignored when either the source or target file are erasure coded. + if (attributes.contains(FileAttribute.REPLICATION) && + !targetFileStatus.isDirectory() && + !targetFileStatus.isErasureCoded() && + !srcFileStatus.isErasureCoded() && + srcFileStatus.getReplication() != targetFileStatus.getReplication()) { targetFS.setReplication(path, srcFileStatus.getReplication()); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1a63e73/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java index f512ef6..8efc5cf 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java @@ -62,6 +62,7 @@ public class TestCopyListingFileStatus { assertEquals(stat.getOwner(), clfs.getOwner()); assertEquals(stat.getGroup(), clfs.getGroup()); assertEquals(stat.getPath(), clfs.getPath()); + assertEquals(stat.isErasureCoded(), clfs.isErasureCoded()); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1a63e73/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java index 8c79bec..c42e546 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java @@ -18,6 +18,10 @@ package org.apache.hadoop.tools.util; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; + import java.io.IOException; import java.io.OutputStream; import java.util.EnumSet; @@ -31,11 +35,15 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.namenode.INodeFile; +import org.apache.hadoop.hdfs.tools.ECAdmin; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.tools.CopyListingFileStatus; import org.apache.hadoop.tools.DistCpOptionSwitch; import org.apache.hadoop.tools.DistCpOptions.FileAttribute; +import org.apache.hadoop.util.ToolRunner; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -52,8 +60,10 @@ public class TestDistCpUtils { @BeforeClass public static void create() throws IOException { + config.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, + "XOR-2-1-64k"); cluster = new MiniDFSCluster.Builder(config) - .numDataNodes(1) + .numDataNodes(2) .format(true) .build(); } @@ -539,6 +549,117 @@ public class TestDistCpUtils { Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication()); } + @Test (timeout = 60000) + public void testReplFactorNotPreservedOnErasureCodedFile() throws Exception { + FileSystem fs = FileSystem.get(config); + + // Case 1: Verify replication attribute not preserved when the source + // file is erasure coded and the target file is replicated. + Path srcECDir = new Path("/tmp/srcECDir"); + Path srcECFile = new Path(srcECDir, "srcECFile"); + Path dstReplDir = new Path("/tmp/dstReplDir"); + Path dstReplFile = new Path(dstReplDir, "destReplFile"); + fs.mkdirs(srcECDir); + fs.mkdirs(dstReplDir); + String[] args = {"-setPolicy", "-path", "/tmp/srcECDir", + "-policy", "XOR-2-1-64k"}; + int res = ToolRunner.run(config, new ECAdmin(config), args); + assertEquals("Setting EC policy should succeed!", 0, res); + verifyReplFactorNotPreservedOnErasureCodedFile(srcECFile, true, + dstReplFile, false); + + // Case 2: Verify replication attribute not preserved when the source + // file is replicated and the target file is erasure coded. + Path srcReplDir = new Path("/tmp/srcReplDir"); + Path srcReplFile = new Path(srcReplDir, "srcReplFile"); + Path dstECDir = new Path("/tmp/dstECDir"); + Path dstECFile = new Path(dstECDir, "destECFile"); + fs.mkdirs(srcReplDir); + fs.mkdirs(dstECDir); + args = new String[]{"-setPolicy", "-path", "/tmp/dstECDir", + "-policy", "XOR-2-1-64k"}; + res = ToolRunner.run(config, new ECAdmin(config), args); + assertEquals("Setting EC policy should succeed!", 0, res); + verifyReplFactorNotPreservedOnErasureCodedFile(srcReplFile, + false, dstECFile, true); + + // Case 3: Verify replication attribute not altered from the default + // INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS when both source and + // target files are erasure coded. + verifyReplFactorNotPreservedOnErasureCodedFile(srcECFile, + true, dstECFile, true); + } + + private void verifyReplFactorNotPreservedOnErasureCodedFile(Path srcFile, + boolean isSrcEC, Path dstFile, boolean isDstEC) throws Exception { + FileSystem fs = FileSystem.get(config); + createFile(fs, srcFile); + CopyListingFileStatus srcStatus = new CopyListingFileStatus( + fs.getFileStatus(srcFile)); + if (isSrcEC) { + assertTrue(srcFile + "should be erasure coded!", + srcStatus.isErasureCoded()); + assertEquals(INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS, + srcStatus.getReplication()); + } else { + assertEquals("Unexpected replication factor for " + srcFile, + fs.getDefaultReplication(srcFile), srcStatus.getReplication()); + } + + createFile(fs, dstFile); + CopyListingFileStatus dstStatus = new CopyListingFileStatus( + fs.getFileStatus(dstFile)); + if (isDstEC) { + assertTrue(dstFile + "should be erasure coded!", + dstStatus.isErasureCoded()); + assertEquals("Unexpected replication factor for erasure coded file!", + INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS, + dstStatus.getReplication()); + } else { + assertEquals("Unexpected replication factor for " + dstFile, + fs.getDefaultReplication(dstFile), dstStatus.getReplication()); + } + + // Let srcFile and dstFile differ on their FileAttribute + fs.setPermission(srcFile, fullPerm); + fs.setOwner(srcFile, "ec", "ec-group"); + fs.setTimes(srcFile, 0, 0); + + fs.setPermission(dstFile, noPerm); + fs.setOwner(dstFile, "normal", "normal-group"); + fs.setTimes(dstFile, 100, 100); + + // Running preserve operations only for replication attribute + srcStatus = new CopyListingFileStatus(fs.getFileStatus(srcFile)); + EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.REPLICATION); + DistCpUtils.preserve(fs, dstFile, srcStatus, attributes, false); + dstStatus = new CopyListingFileStatus(fs.getFileStatus(dstFile)); + + assertFalse("Permission for " + srcFile + " and " + dstFile + + " should not be same after preserve only for replication attr!", + srcStatus.getPermission().equals(dstStatus.getPermission())); + assertFalse("File ownership should not match!", + srcStatus.getOwner().equals(dstStatus.getOwner())); + assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup())); + assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime()); + assertFalse( + srcStatus.getModificationTime() == dstStatus.getModificationTime()); + if (isDstEC) { + assertEquals("Unexpected replication factor for erasure coded file!", + INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS, + dstStatus.getReplication()); + } else { + assertEquals(dstFile + " replication factor should be same as dst " + + "filesystem!", fs.getDefaultReplication(dstFile), + dstStatus.getReplication()); + } + if (!isSrcEC || !isDstEC) { + assertFalse(dstFile + " replication factor should not be " + + "same as " + srcFile, + srcStatus.getReplication() == dstStatus.getReplication()); + } + } + @Test public void testPreserveTimestampOnFile() throws IOException { FileSystem fs = FileSystem.get(config); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org