Repository: hive Updated Branches: refs/heads/master fb3b81e3d -> 0d49b3684
HIVE-15280: Hive.mvFile() misses the "." char when joining the filename + extension (Sergio Pena, reviewed by Sahil Takiar and Yongzhi Chen) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0d49b368 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0d49b368 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0d49b368 Branch: refs/heads/master Commit: 0d49b3684943eb9897cadd0ab9313d64a5903730 Parents: fb3b81e Author: Sergio Pena <[email protected]> Authored: Tue Nov 29 09:10:41 2016 -0600 Committer: Sergio Pena <[email protected]> Committed: Tue Nov 29 09:10:41 2016 -0600 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/metadata/Hive.java | 4 +- .../hive/ql/metadata/TestHiveCopyFiles.java | 223 +++++++++++++++++++ 2 files changed, 225 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0d49b368/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index d912f6a..6627587 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -2934,14 +2934,14 @@ private void constructOneLBLocationMap(FileStatus fSta, int counter = 1; if (!isRenameAllowed || isBlobStoragePath) { while (destFs.exists(destFilePath)) { - destFilePath = new Path(destDirPath, name + ("_copy_" + counter) + type); + destFilePath = new Path(destDirPath, name + ("_copy_" + counter) + (!type.isEmpty() ? "." + type : "")); counter++; } } if (isRenameAllowed) { while (!destFs.rename(sourcePath, destFilePath)) { - destFilePath = new Path(destDirPath, name + ("_copy_" + counter) + type); + destFilePath = new Path(destDirPath, name + ("_copy_" + counter) + (!type.isEmpty() ? "." + type : "")); counter++; } } else if (isSrcLocal) { http://git-wip-us.apache.org/repos/asf/hive/blob/0d49b368/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveCopyFiles.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveCopyFiles.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveCopyFiles.java new file mode 100644 index 0000000..0c1c230 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveCopyFiles.java @@ -0,0 +1,223 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.metadata; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.Mockito; + +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertTrue; + + +@RunWith(Parameterized.class) +public class TestHiveCopyFiles { + private static boolean LOCAL_SOURCE = true; + private static boolean NO_ACID = false; + + private static HiveConf hiveConf; + + private boolean isSourceLocal; + + @Rule + public TemporaryFolder sourceFolder = new TemporaryFolder(); + + @Rule + public TemporaryFolder targetFolder = new TemporaryFolder(); + + @Parameterized.Parameters(name = "{0}") + public static List<Object[]> getParameters() throws Exception { + return Arrays.asList(new Object[][] { + { 0, LOCAL_SOURCE}, { 15, LOCAL_SOURCE}, + { 0, !LOCAL_SOURCE}, { 15, !LOCAL_SOURCE} + }); + } + + @BeforeClass + public static void setUp() { + hiveConf = new HiveConf(TestHiveCopyFiles.class); + SessionState.start(hiveConf); + } + + public TestHiveCopyFiles(int threadCount, boolean isSourceLocal) { + hiveConf.setIntVar(HiveConf.ConfVars.HIVE_MOVE_FILES_THREAD_COUNT, threadCount); + this.isSourceLocal = isSourceLocal; + } + + @Test + public void testRenameNewFilesOnSameFileSystem() throws IOException { + Path sourcePath = new Path(sourceFolder.getRoot().getAbsolutePath()); + sourceFolder.newFile("000000_0"); + sourceFolder.newFile("000001_0"); + sourceFolder.newFile("000000_0.gz"); + sourceFolder.newFile("000001_0.gz"); + + Path targetPath = new Path(targetFolder.getRoot().getAbsolutePath()); + FileSystem targetFs = targetPath.getFileSystem(hiveConf); + + try { + Hive.copyFiles(hiveConf, sourcePath, targetPath, targetFs, isSourceLocal, NO_ACID, null); + } catch (HiveException e) { + e.printStackTrace(); + assertTrue("Hive.copyFiles() threw an unexpected exception.", false); + } + + assertTrue(targetFs.exists(new Path(targetPath, "000000_0"))); + assertTrue(targetFs.exists(new Path(targetPath, "000001_0"))); + assertTrue(targetFs.exists(new Path(targetPath, "000000_0.gz"))); + assertTrue(targetFs.exists(new Path(targetPath, "000001_0.gz"))); + } + + @Test + public void testRenameExistingFilesOnSameFileSystem() throws IOException { + Path sourcePath = new Path(sourceFolder.getRoot().getAbsolutePath()); + sourceFolder.newFile("000000_0"); + sourceFolder.newFile("000001_0"); + sourceFolder.newFile("000000_0.gz"); + sourceFolder.newFile("000001_0.gz"); + + Path targetPath = new Path(targetFolder.getRoot().getAbsolutePath()); + FileSystem targetFs = targetPath.getFileSystem(hiveConf); + + try { + Hive.copyFiles(hiveConf, sourcePath, targetPath, targetFs, isSourceLocal, NO_ACID, null); + } catch (HiveException e) { + e.printStackTrace(); + assertTrue("Hive.copyFiles() threw an unexpected exception.", false); + } + + // If source is local, then source files won't be deleted, and we have to delete them here + if (isSourceLocal) { + sourceFolder.delete(); + sourceFolder.create(); + sourcePath = new Path(sourceFolder.getRoot().getAbsolutePath()); + } + + /* Create new source files with same filenames */ + sourceFolder.newFile("000000_0"); + sourceFolder.newFile("000001_0"); + sourceFolder.newFile("000000_0.gz"); + sourceFolder.newFile("000001_0.gz"); + + try { + Hive.copyFiles(hiveConf, sourcePath, targetPath, targetFs, isSourceLocal, NO_ACID, null); + } catch (HiveException e) { + e.printStackTrace(); + assertTrue("Hive.copyFiles() threw an unexpected exception.", false); + } + + assertTrue(targetFs.exists(new Path(targetPath, "000000_0"))); + assertTrue(targetFs.exists(new Path(targetPath, "000001_0"))); + assertTrue(targetFs.exists(new Path(targetPath, "000000_0.gz"))); + assertTrue(targetFs.exists(new Path(targetPath, "000001_0.gz"))); + assertTrue(targetFs.exists(new Path(targetPath, "000000_0_copy_1"))); + assertTrue(targetFs.exists(new Path(targetPath, "000001_0_copy_1"))); + assertTrue(targetFs.exists(new Path(targetPath, "000000_0_copy_1.gz"))); + assertTrue(targetFs.exists(new Path(targetPath, "000001_0_copy_1.gz"))); + } + + @Test + public void testCopyNewFilesOnDifferentFileSystem() throws IOException { + Path sourcePath = new Path(sourceFolder.getRoot().getAbsolutePath()); + sourceFolder.newFile("000000_0"); + sourceFolder.newFile("000001_0"); + sourceFolder.newFile("000000_0.gz"); + sourceFolder.newFile("000001_0.gz"); + + Path targetPath = new Path(targetFolder.getRoot().getAbsolutePath()); + + // Simulate different filesystems by returning a different URI + FileSystem spyTargetFs = Mockito.spy(targetPath.getFileSystem(hiveConf)); + Mockito.when(spyTargetFs.getUri()).thenReturn(URI.create("hdfs://" + targetPath.toUri().getPath())); + + try { + Hive.copyFiles(hiveConf, sourcePath, targetPath, spyTargetFs, isSourceLocal, NO_ACID, null); + } catch (HiveException e) { + e.printStackTrace(); + assertTrue("Hive.copyFiles() threw an unexpected exception.", false); + } + + assertTrue(spyTargetFs.exists(new Path(targetPath, "000000_0"))); + assertTrue(spyTargetFs.exists(new Path(targetPath, "000001_0"))); + assertTrue(spyTargetFs.exists(new Path(targetPath, "000000_0.gz"))); + assertTrue(spyTargetFs.exists(new Path(targetPath, "000001_0.gz"))); + } + + @Test + public void testCopyExistingFilesOnDifferentFileSystem() throws IOException { + Path sourcePath = new Path(sourceFolder.getRoot().getAbsolutePath()); + sourceFolder.newFile("000000_0"); + sourceFolder.newFile("000001_0"); + sourceFolder.newFile("000000_0.gz"); + sourceFolder.newFile("000001_0.gz"); + + Path targetPath = new Path(targetFolder.getRoot().getAbsolutePath()); + + // Simulate different filesystems by returning a different URI + FileSystem spyTargetFs = Mockito.spy(targetPath.getFileSystem(hiveConf)); + Mockito.when(spyTargetFs.getUri()).thenReturn(URI.create("hdfs://" + targetPath.toUri().getPath())); + + try { + Hive.copyFiles(hiveConf, sourcePath, targetPath, spyTargetFs, isSourceLocal, NO_ACID, null); + } catch (HiveException e) { + e.printStackTrace(); + assertTrue("Hive.copyFiles() threw an unexpected exception.", false); + } + + // If source is local, then source files won't be deleted, and we have to delete them here + if (isSourceLocal) { + sourceFolder.delete(); + sourceFolder.create(); + sourcePath = new Path(sourceFolder.getRoot().getAbsolutePath()); + } + + /* Create new source files with same filenames */ + sourceFolder.newFile("000000_0"); + sourceFolder.newFile("000001_0"); + sourceFolder.newFile("000000_0.gz"); + sourceFolder.newFile("000001_0.gz"); + + try { + Hive.copyFiles(hiveConf, sourcePath, targetPath, spyTargetFs, isSourceLocal, NO_ACID, null); + } catch (HiveException e) { + e.printStackTrace(); + assertTrue("Hive.copyFiles() threw an unexpected exception.", false); + } + + assertTrue(spyTargetFs.exists(new Path(targetPath, "000000_0"))); + assertTrue(spyTargetFs.exists(new Path(targetPath, "000001_0"))); + assertTrue(spyTargetFs.exists(new Path(targetPath, "000000_0.gz"))); + assertTrue(spyTargetFs.exists(new Path(targetPath, "000001_0.gz"))); + assertTrue(spyTargetFs.exists(new Path(targetPath, "000000_0_copy_1"))); + assertTrue(spyTargetFs.exists(new Path(targetPath, "000001_0_copy_1"))); + assertTrue(spyTargetFs.exists(new Path(targetPath, "000000_0_copy_1.gz"))); + assertTrue(spyTargetFs.exists(new Path(targetPath, "000001_0_copy_1.gz"))); + } +}
