HDFS-10971. Distcp should not copy replication factor if source file is erasure 
coded. Contributed by Manoj Govindassamy.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f1a63e73
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f1a63e73
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f1a63e73

Branch: refs/heads/HDFS-10467
Commit: f1a63e73b3a0f09c8158c000acd2815079932b99
Parents: 14434c7
Author: Andrew Wang <w...@apache.org>
Authored: Tue Mar 28 22:14:03 2017 -0700
Committer: Inigo <inigo...@apache.org>
Committed: Wed Mar 29 19:32:11 2017 -0700

----------------------------------------------------------------------
 .../hadoop/tools/CopyListingFileStatus.java     |   4 +
 .../tools/mapred/RetriableFileCopyCommand.java  |   3 +
 .../apache/hadoop/tools/util/DistCpUtils.java   |   9 +-
 .../hadoop/tools/TestCopyListingFileStatus.java |   1 +
 .../hadoop/tools/util/TestDistCpUtils.java      | 123 ++++++++++++++++++-
 5 files changed, 137 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1a63e73/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java
 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java
index 2b1e7e4..00d4b32 100644
--- 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java
+++ 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java
@@ -159,6 +159,10 @@ public final class CopyListingFileStatus implements 
Writable {
     return permission;
   }
 
+  public boolean isErasureCoded() {
+    return getPermission().getErasureCodedBit();
+  }
+
   /**
    * Returns the full logical ACL.
    *

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1a63e73/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
index d1cdfdd..06acd78 100644
--- 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
+++ 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
@@ -167,6 +167,9 @@ public class RetriableFileCopyCommand extends 
RetriableCommand {
         FsPermission.getUMask(targetFS.getConf()));
     final OutputStream outStream;
     if (action == FileAction.OVERWRITE) {
+      // If there is an erasure coding policy set on the target directory,
+      // files will be written to the target directory using the same EC 
policy.
+      // The replication factor of the source file is ignored and not 
preserved.
       final short repl = getReplicationFactor(fileAttributes, source,
           targetFS, targetPath);
       final long blockSize = getBlockSize(fileAttributes, source,

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1a63e73/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
index c308e6f..76bc4c5 100644
--- 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
+++ 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
@@ -236,8 +236,13 @@ public class DistCpUtils {
       }
     }
 
-    if (attributes.contains(FileAttribute.REPLICATION) && 
!targetFileStatus.isDirectory() &&
-        (srcFileStatus.getReplication() != targetFileStatus.getReplication())) 
{
+    // The replication factor can only be preserved for replicated files.
+    // It is ignored when either the source or target file are erasure coded.
+    if (attributes.contains(FileAttribute.REPLICATION) &&
+        !targetFileStatus.isDirectory() &&
+        !targetFileStatus.isErasureCoded() &&
+        !srcFileStatus.isErasureCoded() &&
+        srcFileStatus.getReplication() != targetFileStatus.getReplication()) {
       targetFS.setReplication(path, srcFileStatus.getReplication());
     }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1a63e73/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java
 
b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java
index f512ef6..8efc5cf 100644
--- 
a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java
+++ 
b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java
@@ -62,6 +62,7 @@ public class TestCopyListingFileStatus {
     assertEquals(stat.getOwner(), clfs.getOwner());
     assertEquals(stat.getGroup(), clfs.getGroup());
     assertEquals(stat.getPath(), clfs.getPath());
+    assertEquals(stat.isErasureCoded(), clfs.isErasureCoded());
   }
 
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1a63e73/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
 
b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
index 8c79bec..c42e546 100644
--- 
a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
+++ 
b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
@@ -18,6 +18,10 @@
 
 package org.apache.hadoop.tools.util;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
+
 import java.io.IOException;
 import java.io.OutputStream;
 import java.util.EnumSet;
@@ -31,11 +35,15 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.namenode.INodeFile;
+import org.apache.hadoop.hdfs.tools.ECAdmin;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.tools.CopyListingFileStatus;
 import org.apache.hadoop.tools.DistCpOptionSwitch;
 import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
+import org.apache.hadoop.util.ToolRunner;
 import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.BeforeClass;
@@ -52,8 +60,10 @@ public class TestDistCpUtils {
   
   @BeforeClass
   public static void create() throws IOException {
+    config.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY,
+        "XOR-2-1-64k");
     cluster = new MiniDFSCluster.Builder(config)
-        .numDataNodes(1)
+        .numDataNodes(2)
         .format(true)
         .build(); 
   }
@@ -539,6 +549,117 @@ public class TestDistCpUtils {
     Assert.assertTrue(srcStatus.getReplication() == 
dstStatus.getReplication());
   }
 
+  @Test (timeout = 60000)
+  public void testReplFactorNotPreservedOnErasureCodedFile() throws Exception {
+    FileSystem fs = FileSystem.get(config);
+
+    // Case 1: Verify replication attribute not preserved when the source
+    // file is erasure coded and the target file is replicated.
+    Path srcECDir = new Path("/tmp/srcECDir");
+    Path srcECFile = new Path(srcECDir, "srcECFile");
+    Path dstReplDir = new Path("/tmp/dstReplDir");
+    Path dstReplFile = new Path(dstReplDir, "destReplFile");
+    fs.mkdirs(srcECDir);
+    fs.mkdirs(dstReplDir);
+    String[] args = {"-setPolicy", "-path", "/tmp/srcECDir",
+        "-policy", "XOR-2-1-64k"};
+    int res = ToolRunner.run(config, new ECAdmin(config), args);
+    assertEquals("Setting EC policy should succeed!", 0, res);
+    verifyReplFactorNotPreservedOnErasureCodedFile(srcECFile, true,
+        dstReplFile, false);
+
+    // Case 2: Verify replication attribute not preserved when the source
+    // file is replicated and the target file is erasure coded.
+    Path srcReplDir = new Path("/tmp/srcReplDir");
+    Path srcReplFile = new Path(srcReplDir, "srcReplFile");
+    Path dstECDir = new Path("/tmp/dstECDir");
+    Path dstECFile = new Path(dstECDir, "destECFile");
+    fs.mkdirs(srcReplDir);
+    fs.mkdirs(dstECDir);
+    args = new String[]{"-setPolicy", "-path", "/tmp/dstECDir",
+        "-policy", "XOR-2-1-64k"};
+    res = ToolRunner.run(config, new ECAdmin(config), args);
+    assertEquals("Setting EC policy should succeed!", 0, res);
+    verifyReplFactorNotPreservedOnErasureCodedFile(srcReplFile,
+        false, dstECFile, true);
+
+    // Case 3: Verify replication attribute not altered from the default
+    // INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS when both source and
+    // target files are erasure coded.
+    verifyReplFactorNotPreservedOnErasureCodedFile(srcECFile,
+        true, dstECFile, true);
+  }
+
+  private void verifyReplFactorNotPreservedOnErasureCodedFile(Path srcFile,
+      boolean isSrcEC, Path dstFile, boolean isDstEC) throws Exception {
+    FileSystem fs = FileSystem.get(config);
+    createFile(fs, srcFile);
+    CopyListingFileStatus srcStatus = new CopyListingFileStatus(
+        fs.getFileStatus(srcFile));
+    if (isSrcEC) {
+      assertTrue(srcFile + "should be erasure coded!",
+          srcStatus.isErasureCoded());
+      assertEquals(INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS,
+          srcStatus.getReplication());
+    } else {
+      assertEquals("Unexpected replication factor for " + srcFile,
+          fs.getDefaultReplication(srcFile), srcStatus.getReplication());
+    }
+
+    createFile(fs, dstFile);
+    CopyListingFileStatus dstStatus = new CopyListingFileStatus(
+        fs.getFileStatus(dstFile));
+    if (isDstEC) {
+      assertTrue(dstFile + "should be erasure coded!",
+          dstStatus.isErasureCoded());
+      assertEquals("Unexpected replication factor for erasure coded file!",
+          INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS,
+          dstStatus.getReplication());
+    } else {
+      assertEquals("Unexpected replication factor for " + dstFile,
+          fs.getDefaultReplication(dstFile), dstStatus.getReplication());
+    }
+
+    // Let srcFile and dstFile differ on their FileAttribute
+    fs.setPermission(srcFile, fullPerm);
+    fs.setOwner(srcFile, "ec", "ec-group");
+    fs.setTimes(srcFile, 0, 0);
+
+    fs.setPermission(dstFile, noPerm);
+    fs.setOwner(dstFile, "normal", "normal-group");
+    fs.setTimes(dstFile, 100, 100);
+
+    // Running preserve operations only for replication attribute
+    srcStatus = new CopyListingFileStatus(fs.getFileStatus(srcFile));
+    EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.REPLICATION);
+    DistCpUtils.preserve(fs, dstFile, srcStatus, attributes, false);
+    dstStatus = new CopyListingFileStatus(fs.getFileStatus(dstFile));
+
+    assertFalse("Permission for " + srcFile + " and " + dstFile +
+            " should not be same after preserve only for replication attr!",
+        srcStatus.getPermission().equals(dstStatus.getPermission()));
+    assertFalse("File ownership should not match!",
+        srcStatus.getOwner().equals(dstStatus.getOwner()));
+    assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
+    assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
+    assertFalse(
+        srcStatus.getModificationTime() == dstStatus.getModificationTime());
+    if (isDstEC) {
+      assertEquals("Unexpected replication factor for erasure coded file!",
+          INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS,
+          dstStatus.getReplication());
+    } else {
+      assertEquals(dstFile + " replication factor should be same as dst " +
+              "filesystem!", fs.getDefaultReplication(dstFile),
+          dstStatus.getReplication());
+    }
+    if (!isSrcEC || !isDstEC) {
+      assertFalse(dstFile + " replication factor should not be " +
+              "same as " + srcFile,
+          srcStatus.getReplication() == dstStatus.getReplication());
+    }
+  }
+
   @Test
   public void testPreserveTimestampOnFile() throws IOException {
     FileSystem fs = FileSystem.get(config);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to