This is an automated email from the ASF dual-hosted git repository.
tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new ae64e6de113 HIVE-27833: Hive Acid Replication Support for Dell
Powerscale (#4841) (Harshal Patel, reviewed by Teddy Choi)
ae64e6de113 is described below
commit ae64e6de113d8eb7b7a350d1b4430f16c824244d
Author: harshal-16 <[email protected]>
AuthorDate: Wed Nov 22 06:44:27 2023 +0530
HIVE-27833: Hive Acid Replication Support for Dell Powerscale (#4841)
(Harshal Patel, reviewed by Teddy Choi)
---
.../org/apache/hadoop/hive/common/FileUtils.java | 24 +++++++++++++++++-----
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 ++
.../apache/hadoop/hive/common/TestFileUtils.java | 15 ++++++++++++++
.../hadoop/hive/ql/parse/repl/CopyUtils.java | 2 +-
.../apache/hadoop/hive/shims/Hadoop23Shims.java | 18 +++++++++++-----
.../java/org/apache/hadoop/hive/shims/Utils.java | 8 ++++++--
6 files changed, 56 insertions(+), 13 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index 18efe167a63..be994461f31 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.common;
+import static org.apache.hadoop.hive.shims.Utils.RAW_RESERVED_VIRTUAL_PATH;
+
import java.io.EOFException;
import java.io.File;
import java.io.FileNotFoundException;
@@ -61,11 +63,13 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.PathExistsException;
import org.apache.hadoop.fs.PathIsDirectoryException;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.hive.shims.Utils;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.security.UserGroupInformation;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.functional.RemoteIterators;
import org.apache.hive.common.util.ShutdownHookManager;
@@ -767,7 +771,7 @@ public final class FileUtils {
// is tried and it fails. We depend upon that behaviour in cases like
replication,
// wherein if distcp fails, there is good reason to not plod along with
a trivial
// implementation, and fail instead.
- copied = doIOUtilsCopyBytes(srcFS, srcFS.getFileStatus(src), dstFS, dst,
deleteSource, overwrite, shouldPreserveXAttrs(conf, srcFS, dstFS), conf,
copyStatistics);
+ copied = doIOUtilsCopyBytes(srcFS, srcFS.getFileStatus(src), dstFS, dst,
deleteSource, overwrite, shouldPreserveXAttrs(conf, srcFS, dstFS, src), conf,
copyStatistics);
}
return copied;
}
@@ -895,11 +899,21 @@ public final class FileUtils {
}
}
- public static boolean shouldPreserveXAttrs(HiveConf conf, FileSystem srcFS,
FileSystem dstFS) throws IOException {
- if (!Utils.checkFileSystemXAttrSupport(srcFS) ||
!Utils.checkFileSystemXAttrSupport(dstFS)){
- return false;
+ public static boolean shouldPreserveXAttrs(HiveConf conf, FileSystem srcFS,
FileSystem dstFS, Path path) throws IOException {
+ Preconditions.checkNotNull(path);
+ if
(conf.getBoolVar(ConfVars.DFS_XATTR_ONLY_SUPPORTED_ON_RESERVED_NAMESPACE)) {
+
+ if (!(path.toUri().getPath().startsWith(RAW_RESERVED_VIRTUAL_PATH)
+ && Utils.checkFileSystemXAttrSupport(srcFS, new
Path(RAW_RESERVED_VIRTUAL_PATH))
+ && Utils.checkFileSystemXAttrSupport(dstFS, new
Path(RAW_RESERVED_VIRTUAL_PATH)))) {
+ return false;
+ }
+ } else {
+ if (!Utils.checkFileSystemXAttrSupport(srcFS) ||
!Utils.checkFileSystemXAttrSupport(dstFS)) {
+ return false;
+ }
}
- for (Map.Entry<String,String> entry :
conf.getPropsWithPrefix(Utils.DISTCP_OPTIONS_PREFIX).entrySet()) {
+ for (Map.Entry<String, String> entry :
conf.getPropsWithPrefix(Utils.DISTCP_OPTIONS_PREFIX).entrySet()) {
String distCpOption = entry.getKey();
if (distCpOption.startsWith("p")) {
return distCpOption.contains("x");
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index ec463178912..027bab6eb53 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -487,6 +487,8 @@ public class HiveConf extends Configuration {
MSC_CACHE_RECORD_STATS("hive.metastore.client.cache.v2.recordStats", false,
"This property enables recording metastore client cache stats in
DEBUG logs"),
// QL execution stuff
+
DFS_XATTR_ONLY_SUPPORTED_ON_RESERVED_NAMESPACE("dfs.xattr.supported.only.on.reserved.namespace",
false,
+ "DFS supports xattr only on Reserved Name space (/.reserved/raw)"),
SCRIPTWRAPPER("hive.exec.script.wrapper", null, ""),
PLAN("hive.exec.plan", "", ""),
STAGINGDIR("hive.exec.stagingdir", ".hive-staging",
diff --git
a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/common/TestFileUtils.java
b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/common/TestFileUtils.java
index 732180158a2..807d4694fc0 100644
---
a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/common/TestFileUtils.java
+++
b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/common/TestFileUtils.java
@@ -133,6 +133,21 @@ public class TestFileUtils {
verifyXAttrsPreserved(src, new Path(dst, src.getName()));
}
+ @Test
+ public void testShouldPreserveXAttrs() throws Exception {
+
conf.setBoolean(HiveConf.ConfVars.DFS_XATTR_ONLY_SUPPORTED_ON_RESERVED_NAMESPACE.varname,
true);
+ Path filePath = new Path(basePath, "src.txt");
+ fs.create(filePath).close();
+ Assert.assertFalse(FileUtils.shouldPreserveXAttrs(conf, fs, fs, filePath));
+ Path reservedRawPath = new Path("/.reserved/raw/", "src1.txt");
+ fs.create(reservedRawPath).close();
+ Assert.assertTrue(FileUtils.shouldPreserveXAttrs(conf, fs, fs,
reservedRawPath));
+
+
conf.setBoolean(HiveConf.ConfVars.DFS_XATTR_ONLY_SUPPORTED_ON_RESERVED_NAMESPACE.varname,
false);
+ Assert.assertTrue(FileUtils.shouldPreserveXAttrs(conf, fs, fs, filePath));
+ Assert.assertTrue(FileUtils.shouldPreserveXAttrs(conf, fs, fs,
reservedRawPath));
+ }
+
private void verifyXAttrsPreserved(Path src, Path dst) throws Exception {
FileStatus srcStatus = fs.getFileStatus(src);
FileStatus dstStatus = fs.getFileStatus(dst);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java
index 4dc640ca73f..eb55b549fb8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java
@@ -127,7 +127,7 @@ public class CopyUtils {
Path dst, boolean deleteSource, boolean overwrite,
DataCopyStatistics copyStatistics) throws
IOException {
retryableFxn(() -> {
- boolean preserveXAttrs = FileUtils.shouldPreserveXAttrs(hiveConf, srcFS,
dstFS);
+ boolean preserveXAttrs = FileUtils.shouldPreserveXAttrs(hiveConf, srcFS,
dstFS, paths[0]);
FileUtils.copy(srcFS, paths, dstFS, dst, deleteSource, overwrite,
preserveXAttrs, hiveConf,
copyStatistics);
return null;
diff --git
a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 52e9d1f4503..0437417e4b4 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -108,6 +108,7 @@ import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
import org.apache.tez.test.MiniTezCluster;
+import static org.apache.hadoop.hive.shims.Utils.RAW_RESERVED_VIRTUAL_PATH;
import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_DISTCP_JOB_ID;
/**
@@ -1047,7 +1048,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
List<String> constructDistCpParams(List<Path> srcPaths, Path dst,
Configuration conf) throws IOException {
// -update and -delete are mandatory options for directory copy to work.
List<String> params = constructDistCpDefaultParams(conf,
dst.getFileSystem(conf),
- srcPaths.get(0).getFileSystem(conf));
+ srcPaths.get(0).getFileSystem(conf), srcPaths);
if (!params.contains("-delete")) {
params.add("-delete");
}
@@ -1059,7 +1060,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
}
private List<String> constructDistCpDefaultParams(Configuration conf,
FileSystem dstFs,
- FileSystem sourceFs)
throws IOException {
+ FileSystem sourceFs,
List<Path> srcPaths) throws IOException {
List<String> params = new ArrayList<String>();
boolean needToAddPreserveOption = true;
for (Map.Entry<String,String> entry :
conf.getPropsWithPrefix(Utils.DISTCP_OPTIONS_PREFIX).entrySet()){
@@ -1074,8 +1075,15 @@ public class Hadoop23Shims extends HadoopShimsSecure {
}
}
if (needToAddPreserveOption) {
- params.add((Utils.checkFileSystemXAttrSupport(dstFs)
- && Utils.checkFileSystemXAttrSupport(sourceFs)) ? "-pbx" :
"-pb");
+ if (conf.getBoolean("dfs.xattr.supported.only.on.reserved.namespace",
false)) {
+ boolean shouldCopyXAttrs =
srcPaths.get(0).toUri().getPath().startsWith(RAW_RESERVED_VIRTUAL_PATH)
+ && Utils.checkFileSystemXAttrSupport(sourceFs, new
Path(RAW_RESERVED_VIRTUAL_PATH))
+ && Utils.checkFileSystemXAttrSupport(dstFs, new
Path(RAW_RESERVED_VIRTUAL_PATH));
+ params.add(shouldCopyXAttrs ? "-pbx" : "-pb");
+ } else {
+ params.add((Utils.checkFileSystemXAttrSupport(dstFs)
+ && Utils.checkFileSystemXAttrSupport(sourceFs)) ? "-pbx" : "-pb");
+ }
}
if (!params.contains("-update")) {
params.add("-update");
@@ -1097,7 +1105,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
Configuration conf, String diff) throws IOException {
// Get the default distcp params
List<String> params = constructDistCpDefaultParams(conf,
dst.getFileSystem(conf),
- srcPaths.get(0).getFileSystem(conf));
+ srcPaths.get(0).getFileSystem(conf), srcPaths);
if (params.contains("-delete")) {
params.remove("-delete");
}
diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java
b/shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java
index 339f0b5e9c9..12566d8d93b 100644
--- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java
+++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java
@@ -49,7 +49,7 @@ import org.slf4j.LoggerFactory;
public class Utils {
private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
-
+ public static final String RAW_RESERVED_VIRTUAL_PATH = "/.reserved/raw/";
private static final boolean IBM_JAVA = System.getProperty("java.vendor")
.contains("IBM");
@@ -165,8 +165,12 @@ public class Utils {
}
public static boolean checkFileSystemXAttrSupport(FileSystem fs) throws
IOException {
+ return checkFileSystemXAttrSupport(fs, new Path(Path.SEPARATOR));
+ }
+
+ public static boolean checkFileSystemXAttrSupport(FileSystem fs, Path path)
throws IOException {
try {
- fs.getXAttrs(new Path(Path.SEPARATOR));
+ fs.getXAttrs(path);
return true;
} catch (UnsupportedOperationException e) {
LOG.warn("XAttr won't be preserved since it is not supported for file
system: " + fs.getUri());