Repository: hive Updated Branches: refs/heads/master 879026e93 -> 8d966ed1e
HIVE-14198: Refactor aux jar related code to make them more consistent (Aihua Xu, reviewed by Mohit Sabharwal) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8d966ed1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8d966ed1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8d966ed1 Branch: refs/heads/master Commit: 8d966ed1ee7f934ec8d27756b3e3c8d174aff222 Parents: 879026e Author: Aihua Xu <aihu...@apache.org> Authored: Fri Jul 8 16:44:59 2016 -0400 Committer: Aihua Xu <aihu...@apache.org> Committed: Mon Jul 18 15:41:53 2016 -0400 ---------------------------------------------------------------------- .../apache/hadoop/hive/common/FileUtils.java | 56 +++++++++++++++++ .../org/apache/hadoop/hive/conf/HiveConf.java | 6 +- .../hadoop/hive/common/TestFileUtils.java | 66 ++++++++++++++++++++ .../hadoop/hive/hbase/HBaseTestSetup.java | 7 ++- .../apache/hadoop/hive/ql/exec/Utilities.java | 55 ---------------- .../ql/exec/spark/LocalHiveSparkClient.java | 3 +- .../ql/exec/spark/RemoteHiveSparkClient.java | 10 +-- .../hive/ql/exec/spark/SparkUtilities.java | 14 ----- .../hadoop/hive/ql/session/SessionState.java | 2 +- .../hadoop/hive/ql/exec/TestUtilities.java | 35 ----------- 10 files changed, 138 insertions(+), 116 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/8d966ed1/common/src/java/org/apache/hadoop/hive/common/FileUtils.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index 26ce26f..23fcc8a 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -27,6 +27,7 @@ import java.security.AccessControlException; import java.security.PrivilegedExceptionAction; import java.util.BitSet; import java.util.Collection; +import java.util.HashSet; import java.util.List; import java.util.Random; import java.util.Set; @@ -35,6 +36,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.GlobFilter; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; @@ -911,4 +913,58 @@ public final class FileUtils { return false; } + /** + * Get the URI of the path. Assume to be local file system if no scheme. + */ + public static URI getURI(String path) throws URISyntaxException { + if (path == null) { + return null; + } + + URI uri = new URI(path); + if (uri.getScheme() == null) { + // if no scheme in the path, we assume it's file on local fs. + uri = new File(path).toURI(); + } + + return uri; + } + + /** + * Given a path string, get all the jars from the folder or the files themselves. + * + * @param pathString the path string is the comma-separated path list + * @return the list of the file names in the format of URI formats. + */ + public static Set<String> getJarFilesByPath(String pathString, Configuration conf) { + Set<String> result = new HashSet<String>(); + if (pathString == null || org.apache.commons.lang.StringUtils.isBlank(pathString)) { + return result; + } + + String[] paths = pathString.split(","); + for(String path : paths) { + try { + Path p = new Path(getURI(path)); + FileSystem fs = p.getFileSystem(conf); + if (!fs.exists(p)) { + LOG.error("The jar file path " + path + " doesn't exist"); + continue; + } + if (fs.isDirectory(p)) { + // add all jar files under the folder + FileStatus[] files = fs.listStatus(p, new GlobFilter("*.jar")); + for(FileStatus file : files) { + result.add(file.getPath().toUri().toString()); + } + } else { + result.add(p.toUri().toString()); + } + } catch(URISyntaxException | IOException e) { + LOG.error("Invalid file path " + path, e); + } + } + return result; + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/8d966ed1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 42f7d88..66203a5 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -22,6 +22,7 @@ import com.google.common.base.Joiner; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.conf.Validator.PatternSet; @@ -3756,7 +3757,7 @@ public class HiveConf extends Configuration { } if (auxJars == null) { - auxJars = this.get(ConfVars.HIVEAUXJARS.varname); + auxJars = StringUtils.join(FileUtils.getJarFilesByPath(this.get(ConfVars.HIVEAUXJARS.varname), this), ','); } if (getBoolVar(ConfVars.METASTORE_SCHEMA_VERIFICATION)) { @@ -4036,7 +4037,8 @@ public class HiveConf extends Configuration { } /** - * @param auxJars the auxJars to set + * Set the auxiliary jars. Used for unit tests only. + * @param auxJars the auxJars to set. */ public void setAuxJars(String auxJars) { this.auxJars = auxJars; http://git-wip-us.apache.org/repos/asf/hive/blob/8d966ed1/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java ---------------------------------------------------------------------- diff --git a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java new file mode 100644 index 0000000..e9fcc13 --- /dev/null +++ b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common; + +import java.io.File; +import java.io.IOException; +import java.util.Set; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Sets; +import com.google.common.io.Files; + +import junit.framework.TestCase; + +public class TestFileUtils extends TestCase { + public static final Logger LOG = LoggerFactory.getLogger(TestFileUtils.class); + + @Test + public void testGetJarFilesByPath() { + HiveConf conf = new HiveConf(this.getClass()); + File tmpDir = Files.createTempDir(); + String jarFileName1 = tmpDir.getAbsolutePath() + File.separator + "a.jar"; + String jarFileName2 = tmpDir.getAbsolutePath() + File.separator + "b.jar"; + File jarFile1 = new File(jarFileName1); + try { + org.apache.commons.io.FileUtils.touch(jarFile1); + Set<String> jars = FileUtils.getJarFilesByPath(tmpDir.getAbsolutePath(), conf); + Assert.assertEquals(Sets.newHashSet("file://" + jarFileName1),jars); + + jars = FileUtils.getJarFilesByPath("/folder/not/exist", conf); + Assert.assertTrue(jars.isEmpty()); + + File jarFile2 = new File(jarFileName2); + org.apache.commons.io.FileUtils.touch(jarFile2); + String newPath = "file://" + jarFileName1 + "," + "file://" + jarFileName2 + ",/file/not/exist"; + jars = FileUtils.getJarFilesByPath(newPath, conf); + Assert.assertEquals(Sets.newHashSet("file://" + jarFileName1, "file://" + jarFileName2), jars); + } catch (IOException e) { + LOG.error("failed to copy file to reloading folder", e); + Assert.fail(e.getMessage()); + } finally { + org.apache.commons.io.FileUtils.deleteQuietly(tmpDir); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/8d966ed1/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java b/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java index cee7158..4f8fa05 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.net.ServerSocket; import java.util.Arrays; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; @@ -63,10 +64,10 @@ public class HBaseTestSetup { conf.set("hbase.master", hbaseCluster.getMaster().getServerName().getHostAndPort()); conf.set("hbase.zookeeper.property.clientPort", Integer.toString(zooKeeperPort)); String auxJars = conf.getAuxJars(); - auxJars = ((auxJars == null) ? "" : (auxJars + ",")) + "file:///" + auxJars = (StringUtils.isBlank(auxJars) ? "" : (auxJars + ",")) + "file://" + new JobConf(conf, HBaseConfiguration.class).getJar(); - auxJars += ",file:///" + new JobConf(conf, HBaseSerDe.class).getJar(); - auxJars += ",file:///" + new JobConf(conf, Watcher.class).getJar(); + auxJars += ",file://" + new JobConf(conf, HBaseSerDe.class).getJar(); + auxJars += ",file://" + new JobConf(conf, Watcher.class).getJar(); conf.setAuxJars(auxJars); } http://git-wip-us.apache.org/repos/asf/hive/blob/8d966ed1/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index bc977a0..a376023 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -85,7 +85,6 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.GlobFilter; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.permission.FsPermission; @@ -1742,60 +1741,6 @@ public final class Utilities { return oneurl; } - /** - * Get the URI of the path. Assume to be local file system if no scheme. - */ - public static URI getURI(String path) throws URISyntaxException { - if (path == null) { - return null; - } - - URI uri = new URI(path); - if (uri.getScheme() == null) { - // if no scheme in the path, we assume it's file on local fs. - uri = new File(path).toURI(); - } - - return uri; - } - - /** - * Given a path string, get all the jars from the folder or the files themselves. - * - * @param pathString the path string is the comma-separated path list - * @return the list of the file names in the format of URI formats. - */ - public static Set<String> getJarFilesByPath(String pathString, Configuration conf) { - Set<String> result = new HashSet<String>(); - if (pathString == null || StringUtils.isBlank(pathString)) { - return result; - } - - String[] paths = pathString.split(","); - for(String path : paths) { - try { - Path p = new Path(getURI(path)); - FileSystem fs = p.getFileSystem(conf); - if (!fs.exists(p)) { - LOG.error("The jar file path " + path + " doesn't exist"); - continue; - } - if (fs.isDirectory(p)) { - // add all jar files under the folder - FileStatus[] files = fs.listStatus(p, new GlobFilter("*.jar")); - for(FileStatus file : files) { - result.add(file.getPath().toUri().toString()); - } - } else { - result.add(p.toUri().toString()); - } - } catch(URISyntaxException | IOException e) { - LOG.error("Invalid file path " + path, e); - } - } - return result; - } - private static boolean useExistingClassLoader(ClassLoader cl) { if (!(cl instanceof UDFClassLoader)) { // Cannot use the same classloader if it is not an instance of {@code UDFClassLoader} http://git-wip-us.apache.org/repos/asf/hive/blob/8d966ed1/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/LocalHiveSparkClient.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/LocalHiveSparkClient.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/LocalHiveSparkClient.java index 0c0fe95..c75333d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/LocalHiveSparkClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/LocalHiveSparkClient.java @@ -152,7 +152,8 @@ public class LocalHiveSparkClient implements HiveSparkClient { addJars((new JobConf(this.getClass())).getJar()); // add aux jars - addJars(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS)); + addJars(conf.getAuxJars()); + addJars(SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars()); // add added jars String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR); http://git-wip-us.apache.org/repos/asf/hive/blob/8d966ed1/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java index 2b06a04..a9f70c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java @@ -28,7 +28,6 @@ import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -39,6 +38,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.Context; @@ -211,8 +211,8 @@ public class RemoteHiveSparkClient implements HiveSparkClient { addJars((new JobConf(this.getClass())).getJar()); // add aux jars - addJars(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS)); - addJars(SessionState.get().getReloadableAuxJars()); + addJars(conf.getAuxJars()); + addJars(SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars()); // add added jars String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR); @@ -242,7 +242,7 @@ public class RemoteHiveSparkClient implements HiveSparkClient { private void addResources(String addedFiles) throws IOException { for (String addedFile : CSV_SPLITTER.split(Strings.nullToEmpty(addedFiles))) { try { - URI fileUri = SparkUtilities.getURI(addedFile); + URI fileUri = FileUtils.getURI(addedFile); if (fileUri != null && !localFiles.contains(fileUri)) { localFiles.add(fileUri); if (SparkUtilities.needUploadToHDFS(fileUri, sparkConf)) { @@ -259,7 +259,7 @@ public class RemoteHiveSparkClient implements HiveSparkClient { private void addJars(String addedJars) throws IOException { for (String addedJar : CSV_SPLITTER.split(Strings.nullToEmpty(addedJars))) { try { - URI jarUri = SparkUtilities.getURI(addedJar); + URI jarUri = FileUtils.getURI(addedJar); if (jarUri != null && !localJars.contains(jarUri)) { localJars.add(jarUri); if (SparkUtilities.needUploadToHDFS(jarUri, sparkConf)) { http://git-wip-us.apache.org/repos/asf/hive/blob/8d966ed1/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java index 5a6bef9..630b598 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java @@ -65,20 +65,6 @@ public class SparkUtilities { return copy; } - public static URI getURI(String path) throws URISyntaxException { - if (path == null) { - return null; - } - - URI uri = new URI(path); - if (uri.getScheme() == null) { - // if no file schema in path, we assume it's file on local fs. - uri = new File(path).toURI(); - } - - return uri; - } - /** * Uploads a local file to HDFS * http://git-wip-us.apache.org/repos/asf/hive/blob/8d966ed1/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index f983b82..03010ea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -1109,7 +1109,7 @@ public class SessionState { return; } - Set<String> jarPaths = Utilities.getJarFilesByPath(renewableJarPath, sessionConf); + Set<String> jarPaths = FileUtils.getJarFilesByPath(renewableJarPath, sessionConf); // load jars under the hive.reloadable.aux.jars.path if(!jarPaths.isEmpty()){ http://git-wip-us.apache.org/repos/asf/hive/blob/8d966ed1/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java index 7150424..0786f9b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java @@ -32,11 +32,8 @@ import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Properties; -import java.util.Set; import java.util.UUID; -import org.apache.commons.io.FileUtils; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -59,19 +56,15 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFromUtcTimestamp; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.mapred.JobConf; - import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.common.io.Files; public class TestUtilities { @Rule @@ -151,34 +144,6 @@ public class TestUtilities { } @Test - public void testGetJarFilesByPath() { - HiveConf conf = new HiveConf(this.getClass()); - File f = Files.createTempDir(); - String jarFileName1 = f.getAbsolutePath() + File.separator + "a.jar"; - String jarFileName2 = f.getAbsolutePath() + File.separator + "b.jar"; - File jarFile = new File(jarFileName1); - try { - FileUtils.touch(jarFile); - Set<String> jars = Utilities.getJarFilesByPath(f.getAbsolutePath(), conf); - Assert.assertEquals(Sets.newHashSet("file://" + jarFileName1),jars); - - jars = Utilities.getJarFilesByPath("/folder/not/exist", conf); - Assert.assertTrue(jars.isEmpty()); - - File jarFile2 = new File(jarFileName2); - FileUtils.touch(jarFile2); - String newPath = "file://" + jarFileName1 + "," + "file://" + jarFileName2 + ",/file/not/exist"; - jars = Utilities.getJarFilesByPath(newPath, conf); - Assert.assertEquals(Sets.newHashSet("file://" + jarFileName1, "file://" + jarFileName2), jars); - } catch (IOException e) { - LOG.error("failed to copy file to reloading folder", e); - Assert.fail(e.getMessage()); - } finally { - FileUtils.deleteQuietly(f); - } - } - - @Test public void testReplaceTaskId() { String taskID = "000000"; int bucketNum = 1;