[ 
https://issues.apache.org/jira/browse/HDFS-17381?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17880364#comment-17880364
 ] 

ASF GitHub Bot commented on HDFS-17381:
---------------------------------------

steveloughran commented on code in PR #6551:
URL: https://github.com/apache/hadoop/pull/6551#discussion_r1750385001


##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/WithErasureCoding.java:
##########
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+
+/**
+ * Filesystems that support EC can implement this interface.
+ */
+public interface WithErasureCoding {
+
+  /**
+   * Get the EC Policy name of the given file's fileStatus.
+   * If the file is not erasure coded, this should return null.

Review Comment:
   Use RFC2119 terminology. Here: SHALL



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java:
##########
@@ -641,5 +641,10 @@ private OpenFileOptions() {
                 FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE)
             .collect(Collectors.toSet()));
 
+    /**
+     * EC policy to be set on the file that needs to be created.

Review Comment:
   add an {@value} tag for IDE/javadoc preview of value



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/WithErasureCoding.java:
##########
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+
+/**
+ * Filesystems that support EC can implement this interface.
+ */
+public interface WithErasureCoding {
+
+  /**
+   * Get the EC Policy name of the given file's fileStatus.
+   * If the file is not erasure coded, this should return null.
+   * Callers will make sure to check if the FS schema of the fileStatus

Review Comment:
   hard to parse this. What are callers meant to do? Filesystem schema (hdfs, 
o3, ...) can be changed. Do you mean use `instanceof` to probe a filesystem 
instance to see if it implementas.



##########
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java:
##########
@@ -18,15 +18,20 @@
 
 package org.apache.hadoop.tools.mapred;
 
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.fs.Options;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.WithErasureCoding;
 import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
-import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
+import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies;
 import org.apache.hadoop.tools.DistCpOptions;
 import org.apache.hadoop.tools.util.RetriableCommand;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.mapreduce.Mapper;
+import org.slf4j.Logger;

Review Comment:
   put in a separate import block



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java:
##########
@@ -351,7 +352,12 @@ public String getGroup() {
   public Path getPath() {
     return path;
   }
-  
+
+  @VisibleForTesting

Review Comment:
   no. commented on the test.



##########
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java:
##########
@@ -240,6 +255,107 @@ public void testPreserveAndNoPreserveEC() throws 
Exception {
     dfs.unsetErasureCodingPolicy(dir1);
   }
 
+
+  @Test
+  public void testPreserveECAcrossFilesystems() throws  Exception{
+    // set EC policy on source (HDFS)
+    String[] args = {"-setPolicy", "-path", dir1.toString(),
+        "-policy", "XOR-2-1-1024k"};
+    fs.delete(new Path("/dest"), true);
+    fs.mkdirs(subDir1);
+    DistributedFileSystem dfs = (DistributedFileSystem) fs;
+    dfs.enableErasureCodingPolicy("XOR-2-1-1024k");
+    dfs.setErasureCodingPolicy(dir1, "XOR-2-1-1024k");
+    fs.create(file1).close();
+    int res = ToolRunner.run(conf, new ECAdmin(conf), args);
+    assertEquals("Unable to set EC policy on " + subDir1.toString(), res, 0);
+    String src = "/src/*";
+    Path dest = new Path(TEST_ROOT_DIR, "dest");
+    final Path dest2Dir1 = new Path(dest, "dir1");
+    final Path dest2SubDir1 = new Path(dest2Dir1, "subdir1");
+
+    // copy source(HDFS) to target(DummyECFS) with preserveEC
+
+    try (DummyEcFs dummyEcFs = 
(DummyEcFs)FileSystem.get(URI.create("file:///"), conf)) {
+      Path target = dummyEcFs.makeQualified(dest);
+      DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, src, 
target.toString(),
+          "-pe", conf);
+      try {
+        FileStatus destDir1Status = dummyEcFs.getFileStatus(dest2Dir1);
+        FileStatus destSubDir1Status = dummyEcFs.getFileStatus(dest2SubDir1);
+        assertNotNull(destDir1Status);
+        assertNotNull(destSubDir1Status);
+        // check if target paths are erasure coded.
+        assertTrue(dummyEcFs.isPathErasureCoded(destDir1Status.getPath()));
+        assertTrue(dummyEcFs.isPathErasureCoded(destSubDir1Status.getPath()));
+
+        // copy source(DummyECFS) to target (HDFS)
+        String dfsTarget = "/dest";
+        DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS,
+            target.toString(), dfsTarget, "-pe", conf);
+        Path dfsTargetPath = new Path(dfsTarget);
+        Path dfsTargetDir1 = new Path(dfsTarget, "dir1");
+        assertTrue(fs.exists(dfsTargetPath));
+        assertTrue(fs.exists(dfsTargetDir1));
+        assertTrue(fs.getFileStatus(dfsTargetDir1).isErasureCoded());
+        fs.delete(dfsTargetPath, true);
+      } finally {
+        dummyEcFs.delete(new Path(base.getAbsolutePath()),true);
+      }
+    }
+
+  }
+
+  /**
+   * Dummy/Fake FS implementation that supports Erasure Coding.
+   */
+  public static class DummyEcFs extends LocalFileSystem implements 
WithErasureCoding {
+
+    private  Set<Path> erasureCodedPaths;
+    public DummyEcFs() {
+      super();
+      this.erasureCodedPaths = new HashSet<>();
+    }
+
+    public boolean isPathErasureCoded(Path p){
+      return erasureCodedPaths.contains(p);
+    }
+
+
+    @Override
+    public boolean hasPathCapability(Path path, String capability)
+        throws IOException {
+      switch (validatePathCapabilityArgs(makeQualified(path), capability)) {
+      case Options.OpenFileOptions.FS_OPTION_OPENFILE_EC_POLICY:
+        return true;
+      default:
+        return super.hasPathCapability(path, capability);
+      }
+    }
+
+    @Override
+    public FileStatus getFileStatus(Path f) throws IOException {
+      FileStatus fileStatus = super.getFileStatus(f);
+      if (erasureCodedPaths.contains(f)) {
+        Set<FileStatus.AttrFlags> attrSet = new HashSet<>();

Review Comment:
   create a new FileStatus instance passing down all other values but setting 
the attributes to include HAS_EC



##########
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java:
##########
@@ -240,6 +255,107 @@ public void testPreserveAndNoPreserveEC() throws 
Exception {
     dfs.unsetErasureCodingPolicy(dir1);
   }
 
+
+  @Test
+  public void testPreserveECAcrossFilesystems() throws  Exception{
+    // set EC policy on source (HDFS)
+    String[] args = {"-setPolicy", "-path", dir1.toString(),
+        "-policy", "XOR-2-1-1024k"};
+    fs.delete(new Path("/dest"), true);
+    fs.mkdirs(subDir1);
+    DistributedFileSystem dfs = (DistributedFileSystem) fs;
+    dfs.enableErasureCodingPolicy("XOR-2-1-1024k");
+    dfs.setErasureCodingPolicy(dir1, "XOR-2-1-1024k");
+    fs.create(file1).close();
+    int res = ToolRunner.run(conf, new ECAdmin(conf), args);
+    assertEquals("Unable to set EC policy on " + subDir1.toString(), res, 0);

Review Comment:
   look at the javadocs for assertEquals to see what's wrong with this line



##########
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java:
##########
@@ -376,6 +378,11 @@ public FSDataInputStream open(PathHandle fd, int 
bufferSize)
     return dfs.createWrappedInputStream(dfsis);
   }
 
+  @Override
+  public String getErasureCodingPolicyName(FileStatus fileStatus) {
+    return ((HdfsFileStatus) fileStatus).getErasureCodingPolicy().getName();

Review Comment:
   this raises ClassCastException if a different status class is passed in.
   
   This is NOT consistent with the javadocs "If the call fails due to some 
error, return null."



##########
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java:
##########
@@ -66,12 +75,32 @@ protected Object doExecute(Object... arguments) throws 
Exception {
     boolean preserveEC = getFileAttributeSettings(context)
         .contains(DistCpOptions.FileAttribute.ERASURECODINGPOLICY);
     if (preserveEC && sourceStatus.isErasureCoded()
-        && targetFS instanceof DistributedFileSystem) {
-      ErasureCodingPolicy ecPolicy =
-          ((HdfsFileStatus) sourceStatus).getErasureCodingPolicy();
-      DistributedFileSystem dfs = (DistributedFileSystem) targetFS;
-      dfs.setErasureCodingPolicy(target, ecPolicy.getName());
+        && checkFSSupportsEC(sourceStatus.getPath(), sourceFs)
+        && checkFSSupportsEC(target, targetFS)) {
+      ErasureCodingPolicy ecPolicy = SystemErasureCodingPolicies.getByName(
+          ((WithErasureCoding) sourceFs).getErasureCodingPolicyName(
+              sourceStatus));
+      if (LOG.isDebugEnabled()) {

Review Comment:
   just use LOG.debug(); no need to wrap the SLF4J API



##########
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java:
##########
@@ -66,12 +75,32 @@ protected Object doExecute(Object... arguments) throws 
Exception {
     boolean preserveEC = getFileAttributeSettings(context)
         .contains(DistCpOptions.FileAttribute.ERASURECODINGPOLICY);
     if (preserveEC && sourceStatus.isErasureCoded()
-        && targetFS instanceof DistributedFileSystem) {
-      ErasureCodingPolicy ecPolicy =
-          ((HdfsFileStatus) sourceStatus).getErasureCodingPolicy();
-      DistributedFileSystem dfs = (DistributedFileSystem) targetFS;
-      dfs.setErasureCodingPolicy(target, ecPolicy.getName());
+        && checkFSSupportsEC(sourceStatus.getPath(), sourceFs)
+        && checkFSSupportsEC(target, targetFS)) {
+      ErasureCodingPolicy ecPolicy = SystemErasureCodingPolicies.getByName(
+          ((WithErasureCoding) sourceFs).getErasureCodingPolicyName(
+              sourceStatus));
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("EC Policy for source path is {}", ecPolicy);
+      }
+      WithErasureCoding ecFs =  (WithErasureCoding) targetFS;
+      if (ecPolicy != null) {
+        ecFs.setErasureCodingPolicy(target, ecPolicy.getName());
+      }
     }
     return true;
   }
+
+  /**
+   * Return true if the FS implements {@link WithErasureCoding} and
+   * supports EC_POLICY option in {@link Options.OpenFileOptions}
+   */
+  boolean checkFSSupportsEC(Path path, FileSystem fs) throws IOException {
+    if (fs instanceof WithErasureCoding && fs.hasPathCapability(path,

Review Comment:
   nit: put the && onto a new line so the hasPathCapability probe isn't split



##########
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java:
##########
@@ -66,12 +75,32 @@ protected Object doExecute(Object... arguments) throws 
Exception {
     boolean preserveEC = getFileAttributeSettings(context)
         .contains(DistCpOptions.FileAttribute.ERASURECODINGPOLICY);
     if (preserveEC && sourceStatus.isErasureCoded()
-        && targetFS instanceof DistributedFileSystem) {
-      ErasureCodingPolicy ecPolicy =
-          ((HdfsFileStatus) sourceStatus).getErasureCodingPolicy();
-      DistributedFileSystem dfs = (DistributedFileSystem) targetFS;
-      dfs.setErasureCodingPolicy(target, ecPolicy.getName());
+        && checkFSSupportsEC(sourceStatus.getPath(), sourceFs)
+        && checkFSSupportsEC(target, targetFS)) {
+      ErasureCodingPolicy ecPolicy = SystemErasureCodingPolicies.getByName(
+          ((WithErasureCoding) sourceFs).getErasureCodingPolicyName(
+              sourceStatus));
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("EC Policy for source path is {}", ecPolicy);
+      }
+      WithErasureCoding ecFs =  (WithErasureCoding) targetFS;
+      if (ecPolicy != null) {
+        ecFs.setErasureCodingPolicy(target, ecPolicy.getName());
+      }
     }
     return true;
   }
+
+  /**
+   * Return true if the FS implements {@link WithErasureCoding} and
+   * supports EC_POLICY option in {@link Options.OpenFileOptions}
+   */
+  boolean checkFSSupportsEC(Path path, FileSystem fs) throws IOException {
+    if (fs instanceof WithErasureCoding && fs.hasPathCapability(path,
+        Options.OpenFileOptions.FS_OPTION_OPENFILE_EC_POLICY)) {
+      return true;
+    }
+    LOG.warn("FS with scheme " + fs.getScheme() + " does not support EC");

Review Comment:
   use {} to insert strings into the log message



##########
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java:
##########
@@ -402,6 +403,19 @@ private static long getBlockSize(
         .getDefaultBlockSize(tmpTargetPath);
   }
 
+  /**
+   * Return true if the FS implements {@link WithErasureCoding} and
+   * supports EC_POLICY option in {@link Options.OpenFileOptions}
+   */
+  boolean checkFSSupportsEC(Path path, FileSystem fs) throws IOException {

Review Comment:
   reuse the to-be-shared static implementation.



##########
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java:
##########
@@ -240,6 +255,107 @@ public void testPreserveAndNoPreserveEC() throws 
Exception {
     dfs.unsetErasureCodingPolicy(dir1);
   }
 
+
+  @Test
+  public void testPreserveECAcrossFilesystems() throws  Exception{
+    // set EC policy on source (HDFS)
+    String[] args = {"-setPolicy", "-path", dir1.toString(),
+        "-policy", "XOR-2-1-1024k"};
+    fs.delete(new Path("/dest"), true);
+    fs.mkdirs(subDir1);
+    DistributedFileSystem dfs = (DistributedFileSystem) fs;
+    dfs.enableErasureCodingPolicy("XOR-2-1-1024k");
+    dfs.setErasureCodingPolicy(dir1, "XOR-2-1-1024k");
+    fs.create(file1).close();
+    int res = ToolRunner.run(conf, new ECAdmin(conf), args);
+    assertEquals("Unable to set EC policy on " + subDir1.toString(), res, 0);
+    String src = "/src/*";
+    Path dest = new Path(TEST_ROOT_DIR, "dest");
+    final Path dest2Dir1 = new Path(dest, "dir1");
+    final Path dest2SubDir1 = new Path(dest2Dir1, "subdir1");
+
+    // copy source(HDFS) to target(DummyECFS) with preserveEC
+
+    try (DummyEcFs dummyEcFs = 
(DummyEcFs)FileSystem.get(URI.create("file:///"), conf)) {
+      Path target = dummyEcFs.makeQualified(dest);
+      DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, src, 
target.toString(),
+          "-pe", conf);
+      try {
+        FileStatus destDir1Status = dummyEcFs.getFileStatus(dest2Dir1);
+        FileStatus destSubDir1Status = dummyEcFs.getFileStatus(dest2SubDir1);
+        assertNotNull(destDir1Status);

Review Comment:
   for all these assertions, add a useful string to be thrown on failure. 
   
   your goal should be "jenkins test results SHOULD be meaningful"



##########
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java:
##########
@@ -240,6 +255,107 @@ public void testPreserveAndNoPreserveEC() throws 
Exception {
     dfs.unsetErasureCodingPolicy(dir1);
   }
 
+
+  @Test
+  public void testPreserveECAcrossFilesystems() throws  Exception{
+    // set EC policy on source (HDFS)
+    String[] args = {"-setPolicy", "-path", dir1.toString(),
+        "-policy", "XOR-2-1-1024k"};
+    fs.delete(new Path("/dest"), true);
+    fs.mkdirs(subDir1);
+    DistributedFileSystem dfs = (DistributedFileSystem) fs;
+    dfs.enableErasureCodingPolicy("XOR-2-1-1024k");
+    dfs.setErasureCodingPolicy(dir1, "XOR-2-1-1024k");
+    fs.create(file1).close();
+    int res = ToolRunner.run(conf, new ECAdmin(conf), args);
+    assertEquals("Unable to set EC policy on " + subDir1.toString(), res, 0);
+    String src = "/src/*";
+    Path dest = new Path(TEST_ROOT_DIR, "dest");
+    final Path dest2Dir1 = new Path(dest, "dir1");
+    final Path dest2SubDir1 = new Path(dest2Dir1, "subdir1");
+
+    // copy source(HDFS) to target(DummyECFS) with preserveEC
+
+    try (DummyEcFs dummyEcFs = 
(DummyEcFs)FileSystem.get(URI.create("file:///"), conf)) {
+      Path target = dummyEcFs.makeQualified(dest);
+      DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, src, 
target.toString(),
+          "-pe", conf);
+      try {
+        FileStatus destDir1Status = dummyEcFs.getFileStatus(dest2Dir1);
+        FileStatus destSubDir1Status = dummyEcFs.getFileStatus(dest2SubDir1);
+        assertNotNull(destDir1Status);
+        assertNotNull(destSubDir1Status);
+        // check if target paths are erasure coded.
+        assertTrue(dummyEcFs.isPathErasureCoded(destDir1Status.getPath()));
+        assertTrue(dummyEcFs.isPathErasureCoded(destSubDir1Status.getPath()));
+
+        // copy source(DummyECFS) to target (HDFS)
+        String dfsTarget = "/dest";
+        DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS,
+            target.toString(), dfsTarget, "-pe", conf);
+        Path dfsTargetPath = new Path(dfsTarget);
+        Path dfsTargetDir1 = new Path(dfsTarget, "dir1");
+        assertTrue(fs.exists(dfsTargetPath));

Review Comment:
   use `ContractTestUtils.assertPathExists()` on these two lines



##########
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java:
##########
@@ -240,6 +255,107 @@ public void testPreserveAndNoPreserveEC() throws 
Exception {
     dfs.unsetErasureCodingPolicy(dir1);
   }
 
+
+  @Test
+  public void testPreserveECAcrossFilesystems() throws  Exception{
+    // set EC policy on source (HDFS)
+    String[] args = {"-setPolicy", "-path", dir1.toString(),
+        "-policy", "XOR-2-1-1024k"};
+    fs.delete(new Path("/dest"), true);
+    fs.mkdirs(subDir1);
+    DistributedFileSystem dfs = (DistributedFileSystem) fs;
+    dfs.enableErasureCodingPolicy("XOR-2-1-1024k");
+    dfs.setErasureCodingPolicy(dir1, "XOR-2-1-1024k");
+    fs.create(file1).close();
+    int res = ToolRunner.run(conf, new ECAdmin(conf), args);
+    assertEquals("Unable to set EC policy on " + subDir1.toString(), res, 0);
+    String src = "/src/*";
+    Path dest = new Path(TEST_ROOT_DIR, "dest");
+    final Path dest2Dir1 = new Path(dest, "dir1");
+    final Path dest2SubDir1 = new Path(dest2Dir1, "subdir1");
+
+    // copy source(HDFS) to target(DummyECFS) with preserveEC
+
+    try (DummyEcFs dummyEcFs = 
(DummyEcFs)FileSystem.get(URI.create("file:///"), conf)) {
+      Path target = dummyEcFs.makeQualified(dest);
+      DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, src, 
target.toString(),
+          "-pe", conf);
+      try {
+        FileStatus destDir1Status = dummyEcFs.getFileStatus(dest2Dir1);
+        FileStatus destSubDir1Status = dummyEcFs.getFileStatus(dest2SubDir1);
+        assertNotNull(destDir1Status);
+        assertNotNull(destSubDir1Status);
+        // check if target paths are erasure coded.
+        assertTrue(dummyEcFs.isPathErasureCoded(destDir1Status.getPath()));
+        assertTrue(dummyEcFs.isPathErasureCoded(destSubDir1Status.getPath()));
+
+        // copy source(DummyECFS) to target (HDFS)
+        String dfsTarget = "/dest";
+        DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS,
+            target.toString(), dfsTarget, "-pe", conf);
+        Path dfsTargetPath = new Path(dfsTarget);
+        Path dfsTargetDir1 = new Path(dfsTarget, "dir1");
+        assertTrue(fs.exists(dfsTargetPath));
+        assertTrue(fs.exists(dfsTargetDir1));
+        assertTrue(fs.getFileStatus(dfsTargetDir1).isErasureCoded());

Review Comment:
   include the filestatus string value in the error text to raise here



##########
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java:
##########
@@ -494,6 +495,14 @@ default FileStatus makeQualified(URI defaultUri, Path 
parent) {
 
   String getNamespace();
 
+  /**

Review Comment:
   again, no. these things are just too important to make changes to just for 
one test.



##########
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java:
##########
@@ -66,12 +75,32 @@ protected Object doExecute(Object... arguments) throws 
Exception {
     boolean preserveEC = getFileAttributeSettings(context)
         .contains(DistCpOptions.FileAttribute.ERASURECODINGPOLICY);
     if (preserveEC && sourceStatus.isErasureCoded()
-        && targetFS instanceof DistributedFileSystem) {
-      ErasureCodingPolicy ecPolicy =
-          ((HdfsFileStatus) sourceStatus).getErasureCodingPolicy();
-      DistributedFileSystem dfs = (DistributedFileSystem) targetFS;
-      dfs.setErasureCodingPolicy(target, ecPolicy.getName());
+        && checkFSSupportsEC(sourceStatus.getPath(), sourceFs)
+        && checkFSSupportsEC(target, targetFS)) {
+      ErasureCodingPolicy ecPolicy = SystemErasureCodingPolicies.getByName(
+          ((WithErasureCoding) sourceFs).getErasureCodingPolicyName(
+              sourceStatus));
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("EC Policy for source path is {}", ecPolicy);
+      }
+      WithErasureCoding ecFs =  (WithErasureCoding) targetFS;
+      if (ecPolicy != null) {
+        ecFs.setErasureCodingPolicy(target, ecPolicy.getName());
+      }
     }
     return true;
   }
+
+  /**
+   * Return true if the FS implements {@link WithErasureCoding} and
+   * supports EC_POLICY option in {@link Options.OpenFileOptions}
+   */
+  boolean checkFSSupportsEC(Path path, FileSystem fs) throws IOException {

Review Comment:
   1. pull out and make static so it can be used in later classes
   2. swap filesystem and path arguments as that's common across the codebase





> Distcp of EC files should not be limited to DFS.
> ------------------------------------------------
>
>                 Key: HDFS-17381
>                 URL: https://issues.apache.org/jira/browse/HDFS-17381
>             Project: Hadoop HDFS
>          Issue Type: Bug
>          Components: distcp
>            Reporter: Sadanand Shenoy
>            Assignee: Sadanand Shenoy
>            Priority: Major
>              Labels: pull-request-available
>
> Currently EC file support in distcp is limited to DFS and the code checks if 
> the given FS instance is DistributedFileSystem, In Ozone, EC is supported 
> now, and this limitation can be removed and a general contract for any 
> filesystem that supports EC files should be allowed by implementing few 
> interfaces/methods.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org

Reply via email to