This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new ae64e6de113 HIVE-27833: Hive Acid Replication Support for Dell 
Powerscale (#4841) (Harshal Patel, reviewed by Teddy Choi)
ae64e6de113 is described below

commit ae64e6de113d8eb7b7a350d1b4430f16c824244d
Author: harshal-16 <[email protected]>
AuthorDate: Wed Nov 22 06:44:27 2023 +0530

    HIVE-27833: Hive Acid Replication Support for Dell Powerscale (#4841) 
(Harshal Patel, reviewed by Teddy Choi)
---
 .../org/apache/hadoop/hive/common/FileUtils.java   | 24 +++++++++++++++++-----
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |  2 ++
 .../apache/hadoop/hive/common/TestFileUtils.java   | 15 ++++++++++++++
 .../hadoop/hive/ql/parse/repl/CopyUtils.java       |  2 +-
 .../apache/hadoop/hive/shims/Hadoop23Shims.java    | 18 +++++++++++-----
 .../java/org/apache/hadoop/hive/shims/Utils.java   |  8 ++++++--
 6 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java 
b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index 18efe167a63..be994461f31 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.common;
 
+import static org.apache.hadoop.hive.shims.Utils.RAW_RESERVED_VIRTUAL_PATH;
+
 import java.io.EOFException;
 import java.io.File;
 import java.io.FileNotFoundException;
@@ -61,11 +63,13 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.PathExistsException;
 import org.apache.hadoop.fs.PathIsDirectoryException;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.hive.shims.Utils;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.security.UserGroupInformation;
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.functional.RemoteIterators;
 import org.apache.hive.common.util.ShutdownHookManager;
@@ -767,7 +771,7 @@ public final class FileUtils {
       // is tried and it fails. We depend upon that behaviour in cases like 
replication,
       // wherein if distcp fails, there is good reason to not plod along with 
a trivial
       // implementation, and fail instead.
-      copied = doIOUtilsCopyBytes(srcFS, srcFS.getFileStatus(src), dstFS, dst, 
deleteSource, overwrite, shouldPreserveXAttrs(conf, srcFS, dstFS), conf, 
copyStatistics);
+      copied = doIOUtilsCopyBytes(srcFS, srcFS.getFileStatus(src), dstFS, dst, 
deleteSource, overwrite, shouldPreserveXAttrs(conf, srcFS, dstFS, src), conf, 
copyStatistics);
     }
     return copied;
   }
@@ -895,11 +899,21 @@ public final class FileUtils {
     }
   }
 
-  public static boolean shouldPreserveXAttrs(HiveConf conf, FileSystem srcFS, 
FileSystem dstFS) throws IOException {
-    if (!Utils.checkFileSystemXAttrSupport(srcFS) || 
!Utils.checkFileSystemXAttrSupport(dstFS)){
-      return false;
+  public static boolean shouldPreserveXAttrs(HiveConf conf, FileSystem srcFS, 
FileSystem dstFS, Path path) throws IOException {
+    Preconditions.checkNotNull(path);
+    if 
(conf.getBoolVar(ConfVars.DFS_XATTR_ONLY_SUPPORTED_ON_RESERVED_NAMESPACE)) {
+
+      if (!(path.toUri().getPath().startsWith(RAW_RESERVED_VIRTUAL_PATH)
+        && Utils.checkFileSystemXAttrSupport(srcFS, new 
Path(RAW_RESERVED_VIRTUAL_PATH))
+        && Utils.checkFileSystemXAttrSupport(dstFS, new 
Path(RAW_RESERVED_VIRTUAL_PATH)))) {
+        return false;
+      }
+    } else {
+      if (!Utils.checkFileSystemXAttrSupport(srcFS) || 
!Utils.checkFileSystemXAttrSupport(dstFS)) {
+        return false;
+      }
     }
-    for (Map.Entry<String,String> entry : 
conf.getPropsWithPrefix(Utils.DISTCP_OPTIONS_PREFIX).entrySet()) {
+    for (Map.Entry<String, String> entry : 
conf.getPropsWithPrefix(Utils.DISTCP_OPTIONS_PREFIX).entrySet()) {
       String distCpOption = entry.getKey();
       if (distCpOption.startsWith("p")) {
         return distCpOption.contains("x");
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index ec463178912..027bab6eb53 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -487,6 +487,8 @@ public class HiveConf extends Configuration {
     MSC_CACHE_RECORD_STATS("hive.metastore.client.cache.v2.recordStats", false,
             "This property enables recording metastore client cache stats in 
DEBUG logs"),
     // QL execution stuff
+    
DFS_XATTR_ONLY_SUPPORTED_ON_RESERVED_NAMESPACE("dfs.xattr.supported.only.on.reserved.namespace",
 false,
+      "DFS supports xattr only on Reserved Name space (/.reserved/raw)"),
     SCRIPTWRAPPER("hive.exec.script.wrapper", null, ""),
     PLAN("hive.exec.plan", "", ""),
     STAGINGDIR("hive.exec.stagingdir", ".hive-staging",
diff --git 
a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/common/TestFileUtils.java
 
b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/common/TestFileUtils.java
index 732180158a2..807d4694fc0 100644
--- 
a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/common/TestFileUtils.java
+++ 
b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/common/TestFileUtils.java
@@ -133,6 +133,21 @@ public class TestFileUtils {
     verifyXAttrsPreserved(src, new Path(dst, src.getName()));
   }
 
+  @Test
+  public void testShouldPreserveXAttrs() throws Exception {
+    
conf.setBoolean(HiveConf.ConfVars.DFS_XATTR_ONLY_SUPPORTED_ON_RESERVED_NAMESPACE.varname,
 true);
+    Path filePath = new Path(basePath, "src.txt");
+    fs.create(filePath).close();
+    Assert.assertFalse(FileUtils.shouldPreserveXAttrs(conf, fs, fs, filePath));
+    Path reservedRawPath = new Path("/.reserved/raw/", "src1.txt");
+    fs.create(reservedRawPath).close();
+    Assert.assertTrue(FileUtils.shouldPreserveXAttrs(conf, fs, fs, 
reservedRawPath));
+
+    
conf.setBoolean(HiveConf.ConfVars.DFS_XATTR_ONLY_SUPPORTED_ON_RESERVED_NAMESPACE.varname,
 false);
+    Assert.assertTrue(FileUtils.shouldPreserveXAttrs(conf, fs, fs, filePath));
+    Assert.assertTrue(FileUtils.shouldPreserveXAttrs(conf, fs, fs, 
reservedRawPath));
+  }
+
   private void verifyXAttrsPreserved(Path src, Path dst) throws Exception {
     FileStatus srcStatus = fs.getFileStatus(src);
     FileStatus dstStatus = fs.getFileStatus(dst);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java
index 4dc640ca73f..eb55b549fb8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java
@@ -127,7 +127,7 @@ public class CopyUtils {
                           Path dst, boolean deleteSource, boolean overwrite,
                           DataCopyStatistics copyStatistics) throws 
IOException {
     retryableFxn(() -> {
-      boolean preserveXAttrs = FileUtils.shouldPreserveXAttrs(hiveConf, srcFS, 
dstFS);
+      boolean preserveXAttrs = FileUtils.shouldPreserveXAttrs(hiveConf, srcFS, 
dstFS, paths[0]);
       FileUtils.copy(srcFS, paths, dstFS, dst, deleteSource, overwrite, 
preserveXAttrs, hiveConf,
           copyStatistics);
       return null;
diff --git 
a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java 
b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 52e9d1f4503..0437417e4b4 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -108,6 +108,7 @@ import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
 import org.apache.tez.test.MiniTezCluster;
 
+import static org.apache.hadoop.hive.shims.Utils.RAW_RESERVED_VIRTUAL_PATH;
 import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_DISTCP_JOB_ID;
 
 /**
@@ -1047,7 +1048,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
   List<String> constructDistCpParams(List<Path> srcPaths, Path dst, 
Configuration conf) throws IOException {
     // -update and -delete are mandatory options for directory copy to work.
     List<String> params = constructDistCpDefaultParams(conf, 
dst.getFileSystem(conf),
-            srcPaths.get(0).getFileSystem(conf));
+            srcPaths.get(0).getFileSystem(conf), srcPaths);
     if (!params.contains("-delete")) {
       params.add("-delete");
     }
@@ -1059,7 +1060,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
   }
 
   private List<String> constructDistCpDefaultParams(Configuration conf, 
FileSystem dstFs,
-                                                    FileSystem sourceFs) 
throws IOException {
+                                                    FileSystem sourceFs, 
List<Path> srcPaths) throws IOException {
     List<String> params = new ArrayList<String>();
     boolean needToAddPreserveOption = true;
     for (Map.Entry<String,String> entry : 
conf.getPropsWithPrefix(Utils.DISTCP_OPTIONS_PREFIX).entrySet()){
@@ -1074,8 +1075,15 @@ public class Hadoop23Shims extends HadoopShimsSecure {
       }
     }
     if (needToAddPreserveOption) {
-      params.add((Utils.checkFileSystemXAttrSupport(dstFs)
-              && Utils.checkFileSystemXAttrSupport(sourceFs)) ? "-pbx" : 
"-pb");
+      if (conf.getBoolean("dfs.xattr.supported.only.on.reserved.namespace", 
false)) {
+        boolean shouldCopyXAttrs =  
srcPaths.get(0).toUri().getPath().startsWith(RAW_RESERVED_VIRTUAL_PATH)
+          && Utils.checkFileSystemXAttrSupport(sourceFs, new 
Path(RAW_RESERVED_VIRTUAL_PATH))
+          && Utils.checkFileSystemXAttrSupport(dstFs, new 
Path(RAW_RESERVED_VIRTUAL_PATH));
+        params.add(shouldCopyXAttrs ? "-pbx" : "-pb");
+      } else {
+        params.add((Utils.checkFileSystemXAttrSupport(dstFs)
+          && Utils.checkFileSystemXAttrSupport(sourceFs)) ? "-pbx" : "-pb");
+      }
     }
     if (!params.contains("-update")) {
       params.add("-update");
@@ -1097,7 +1105,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
       Configuration conf, String diff) throws IOException {
     // Get the default distcp params
     List<String> params = constructDistCpDefaultParams(conf, 
dst.getFileSystem(conf),
-            srcPaths.get(0).getFileSystem(conf));
+            srcPaths.get(0).getFileSystem(conf), srcPaths);
     if (params.contains("-delete")) {
       params.remove("-delete");
     }
diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java 
b/shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java
index 339f0b5e9c9..12566d8d93b 100644
--- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java
+++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java
@@ -49,7 +49,7 @@ import org.slf4j.LoggerFactory;
 public class Utils {
 
   private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
-
+  public static final String RAW_RESERVED_VIRTUAL_PATH = "/.reserved/raw/";
   private static final boolean IBM_JAVA = System.getProperty("java.vendor")
       .contains("IBM");
 
@@ -165,8 +165,12 @@ public class Utils {
   }
 
   public static boolean checkFileSystemXAttrSupport(FileSystem fs) throws 
IOException {
+    return checkFileSystemXAttrSupport(fs, new Path(Path.SEPARATOR));
+  }
+
+  public static boolean checkFileSystemXAttrSupport(FileSystem fs, Path path) 
throws IOException {
     try {
-      fs.getXAttrs(new Path(Path.SEPARATOR));
+      fs.getXAttrs(path);
       return true;
     } catch (UnsupportedOperationException e) {
       LOG.warn("XAttr won't be preserved since it is not supported for file 
system: " + fs.getUri());

Reply via email to