Repository: hive Updated Branches: refs/heads/master a4dd84b38 -> d50aefb59
HIVE-20476: CopyUtils used by REPL LOAD and EXPORT/IMPORT operations ignore distcp error (Sankar Hariappan, reviewed by Mahesh Kumar Behera, Thejas M Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d50aefb5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d50aefb5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d50aefb5 Branch: refs/heads/master Commit: d50aefb594ad3baca7ca9fd9e83cfb0e860a28c5 Parents: a4dd84b Author: Sankar Hariappan <[email protected]> Authored: Tue Sep 4 15:50:40 2018 +0530 Committer: Sankar Hariappan <[email protected]> Committed: Tue Sep 4 15:50:40 2018 +0530 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/metadata/Hive.java | 7 +- .../hadoop/hive/ql/parse/repl/CopyUtils.java | 10 +-- .../hive/ql/parse/repl/TestCopyUtils.java | 69 ++++++++++++++++++-- 3 files changed, 75 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d50aefb5/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 08a4506..00d3d0d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -3757,10 +3757,13 @@ private void constructOneLBLocationMap(FileStatus fSta, } else if (isSrcLocal) { destFs.copyFromLocalFile(sourcePath, destFilePath); } else { - FileUtils.copy(sourceFs, sourcePath, destFs, destFilePath, + if (!FileUtils.copy(sourceFs, sourcePath, destFs, destFilePath, true, // delete source false, // overwrite destination - conf); + conf)) { + LOG.error("Copy failed for source: " + sourcePath + " to destination: " + destFilePath); + throw new IOException("File copy failed."); + } } return destFilePath; } http://git-wip-us.apache.org/repos/asf/hive/blob/d50aefb5/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java index 75dcaa3..461f558 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java @@ -262,15 +262,17 @@ public class CopyUtils { RAW_RESERVED_VIRTUAL_PATH + destinationUri.getPath()); } - FileUtils.distCp( + if (!FileUtils.distCp( sourceFs, // source file system srcList, // list of source paths destination, false, usePrivilegedUser ? copyAsUser : null, hiveConf, - ShimLoader.getHadoopShims() - ); + ShimLoader.getHadoopShims())) { + LOG.error("Distcp failed to copy files: " + srcList + " to destination: " + destination); + throw new IOException("Distcp operation failed."); + } } private void doRegularCopyOnce(FileSystem sourceFs, List<Path> srcList, FileSystem destinationFs, @@ -319,7 +321,7 @@ public class CopyUtils { 3. aggregate fileSize of all source Paths(can be directory / file) is less than configured size. 4. number of files of all source Paths(can be directory / file) is less than configured size. */ - private boolean regularCopy(FileSystem destinationFs, FileSystem sourceFs, List<ReplChangeManager.FileInfo> fileList) + boolean regularCopy(FileSystem destinationFs, FileSystem sourceFs, List<ReplChangeManager.FileInfo> fileList) throws IOException { if (hiveInTest) { return true; http://git-wip-us.apache.org/repos/asf/hive/blob/d50aefb5/ql/src/test/org/apache/hadoop/hive/ql/parse/repl/TestCopyUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/repl/TestCopyUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/repl/TestCopyUtils.java index 8714660..7bd660b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/repl/TestCopyUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/repl/TestCopyUtils.java @@ -18,30 +18,89 @@ package org.apache.hadoop.hive.ql.parse.repl; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.ReplChangeManager; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; +import org.apache.hadoop.security.UserGroupInformation; import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mockito; +import org.powermock.core.classloader.annotations.PowerMockIgnore; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; import static org.junit.Assert.assertFalse; +import static org.mockito.Matchers.anyBoolean; +import static org.mockito.Matchers.anyListOf; +import static org.mockito.Matchers.eq; +import static org.mockito.Matchers.same; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.powermock.api.mockito.PowerMockito.mockStatic; +import static org.powermock.api.mockito.PowerMockito.when; +/** + * Unit Test class for CopyUtils class. + */ +@RunWith(PowerMockRunner.class) +@PrepareForTest({ CopyUtils.class, FileUtils.class, Utils.class, UserGroupInformation.class}) +@PowerMockIgnore({ "javax.management.*" }) public class TestCopyUtils { /* Distcp currently does not copy a single file in a distributed manner hence we dont care about the size of file, if there is only file, we dont want to launch distcp. */ @Test - public void distcpShouldNotBeCalledOnlyForOneFile() { - HiveConf conf = new HiveConf(); - conf.setLongVar(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXSIZE, 1); + public void distcpShouldNotBeCalledOnlyForOneFile() throws Exception { + mockStatic(UserGroupInformation.class); + when(UserGroupInformation.getCurrentUser()).thenReturn(mock(UserGroupInformation.class)); + + HiveConf conf = Mockito.spy(new HiveConf()); + doReturn(1L).when(conf).getLong(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXSIZE.varname, 32L * 1024 * 1024); CopyUtils copyUtils = new CopyUtils("", conf); long MB_128 = 128 * 1024 * 1024; assertFalse(copyUtils.limitReachedForLocalCopy(MB_128, 1L)); } @Test - public void distcpShouldNotBeCalledForSmallerFileSize() { - HiveConf conf = new HiveConf(); + public void distcpShouldNotBeCalledForSmallerFileSize() throws Exception { + mockStatic(UserGroupInformation.class); + when(UserGroupInformation.getCurrentUser()).thenReturn(mock(UserGroupInformation.class)); + + HiveConf conf = Mockito.spy(new HiveConf()); CopyUtils copyUtils = new CopyUtils("", conf); long MB_16 = 16 * 1024 * 1024; assertFalse(copyUtils.limitReachedForLocalCopy(MB_16, 100L)); } + + @Test(expected = IOException.class) + public void shouldThrowExceptionOnDistcpFailure() throws Exception { + Path destination = mock(Path.class); + Path source = mock(Path.class); + FileSystem fs = mock(FileSystem.class); + List<Path> srcPaths = Arrays.asList(source, source); + HiveConf conf = mock(HiveConf.class); + CopyUtils copyUtils = Mockito.spy(new CopyUtils(null, conf)); + + mockStatic(FileUtils.class); + mockStatic(Utils.class); + when(destination.getFileSystem(same(conf))).thenReturn(fs); + when(source.getFileSystem(same(conf))).thenReturn(fs); + when(FileUtils.distCp(same(fs), anyListOf(Path.class), same(destination), + anyBoolean(), eq(null), same(conf), + same(ShimLoader.getHadoopShims()))) + .thenReturn(false); + when(Utils.getUGI()).thenReturn(mock(UserGroupInformation.class)); + doReturn(false).when(copyUtils).regularCopy(same(fs), same(fs), anyListOf(ReplChangeManager.FileInfo.class)); + + copyUtils.doCopy(destination, srcPaths); + } } \ No newline at end of file
