Author: ekoifman Date: Mon Mar 30 18:10:29 2015 New Revision: 1670162 URL: http://svn.apache.org/r1670162 Log: HIVE-10066 Hive on Tez job submission through WebHCat doesn't ship Tez artifacts (Eugene Koifman, reviewed by Thejas Nair)
Modified: hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java Modified: hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml?rev=1670162&r1=1670161&r2=1670162&view=diff ============================================================================== --- hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml (original) +++ hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml Mon Mar 30 18:10:29 2015 @@ -35,7 +35,7 @@ <property> <name>templeton.libjars</name> - <value>${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.5.jar</value> + <value>${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.6.jar,${env.TEMPLETON_HOME}/../lib/hive-common-1.2.0-SNAPSHOT.jar</value> <description>Jars to add to the classpath.</description> </property> @@ -69,6 +69,11 @@ shipped to the target node in the cluster to execute Pig job which uses HCat, Hive query, etc.</description> </property> + + <property> + <name>templeton.hive.extra.files</name> + <value>${env.TEZ_CLIENT_HOME}/conf/tez-site.xml,${env.TEZ_CLIENT_HOME}/,${env.TEZ_CLIENT_HOME}/lib</value> + </property> <property> <name>templeton.hcat.home</name> <value>apache-hive-${env.HIVE_VERSION}-bin.tar.gz/apache-hive-${env.HIVE_VERSION}-bin/hcatalog</value> @@ -101,7 +106,7 @@ </property> <property> - <!--\,thrift://127.0.0.1:9933--> + <!--\,thrift://127.0.0.1:9933,,hive.execution.engine=tez--> <name>templeton.hive.properties</name> <value>hive.metastore.uris=thrift://localhost:9933,hive.metastore.sasl.enabled=false</value> </property> Modified: hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh?rev=1670162&r1=1670161&r2=1670162&view=diff ============================================================================== --- hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh (original) +++ hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh Mon Mar 30 18:10:29 2015 @@ -36,6 +36,10 @@ if [ -z ${PIG_VERSION} ]; then export PIG_VERSION=0.12.2-SNAPSHOT fi +if [ -z ${TEZ_VERSION} ]; then + export TEZ_VERSION=0.5.3 +fi + #Root of project source tree if [ -z ${PROJ_HOME} ]; then export PROJ_HOME=/Users/${USER}/dev/hive @@ -46,6 +50,7 @@ if [ -z ${HADOOP_HOME} ]; then export HADOOP_HOME=/Users/${USER}/dev/hwxhadoop/hadoop-dist/target/hadoop-${HADOOP_VERSION} fi +export TEZ_CLIENT_HOME=/Users/ekoifman/dev/apache-tez-client-${TEZ_VERSION} #Make sure Pig is built for the Hadoop version you are running export PIG_TAR_PATH=/Users/${USER}/dev/pig-${PIG_VERSION}-src/build #this is part of Pig distribution Modified: hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml?rev=1670162&r1=1670161&r2=1670162&view=diff ============================================================================== --- hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml (original) +++ hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml Mon Mar 30 18:10:29 2015 @@ -39,7 +39,7 @@ <property> <name>templeton.libjars</name> - <value>${env.TEMPLETON_HOME}/share/webhcat/svr/lib/zookeeper-3.4.3.jar</value> + <value>${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.6.jar,${env.TEMPLETON_HOME}/../lib/hive-common-1.2.0-SNAPSHOT.jar</value> <description>Jars to add to the classpath.</description> </property> @@ -106,7 +106,20 @@ <property> <name>templeton.hive.path</name> <value>hive-0.11.0.tar.gz/hive-0.11.0/bin/hive</value> - <description>The path to the Hive executable.</description> + <description>The path to the Hive executable. Applies only if templeton.hive.archive is defined.</description> + </property> + + <property> + <name>templeton.hive.extra.files</name> + <value>/tez-client/conf/tez-site.xml,/tez-client/,/tez-client/lib</value> + <description>The resources in this list will be localized to the node running LaunchMapper and added to HADOOP_CLASSPTH + before launching 'hive' command. If the path /foo/bar is a directory, the contents of the the entire dir will be localized + and ./bar/* will be added to HADOOP_CLASSPATH. Note that since classpath path processing does not recurse into subdirectories, + the paths in this property may be overlapping. In the example above, "./tez-site.xml:./tez-client/*:./lib/*" will be added to + HADOOP_CLASSPATH. + This can be used to specify config files, Tez artifacts, etc. This will be sent -files option of hadoop jar command thus + each path is interpreted by Generic Option Parser. It can be local or hdfs path. + </description> </property> <property> Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java?rev=1670162&r1=1670161&r2=1670162&view=diff ============================================================================== --- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java (original) +++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java Mon Mar 30 18:10:29 2015 @@ -154,6 +154,11 @@ public class AppConfig extends Configura public static final String HADOOP_MR_AM_JAVA_OPTS = "yarn.app.mapreduce.am.command-opts"; public static final String HADOOP_MR_AM_MEMORY_MB = "yarn.app.mapreduce.am.resource.mb"; public static final String UNIT_TEST_MODE = "templeton.unit.test.mode"; + /** + * comma-separated list of artifacts to add to HADOOP_CLASSPATH evn var in + * LaunchMapper before launching Hive command + */ + public static final String HIVE_EXTRA_FILES = "templeton.hive.extra.files"; private static final Log LOG = LogFactory.getLog(AppConfig.class); Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java?rev=1670162&r1=1670161&r2=1670162&view=diff ============================================================================== --- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java (original) +++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java Mon Mar 30 18:10:29 2015 @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import org.apache.commons.exec.ExecuteException; +import org.apache.hadoop.fs.Path; import org.apache.hive.hcatalog.templeton.tool.JobSubmissionConstants; import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob; import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; @@ -117,7 +118,7 @@ public class HiveDelegator extends Launc private List<String> makeBasicArgs(String execute, String srcFile, String otherFiles, String statusdir, String completedUrl, boolean enablelog) - throws URISyntaxException, FileNotFoundException, IOException, + throws URISyntaxException, IOException, InterruptedException { ArrayList<String> args = new ArrayList<String>(); @@ -142,6 +143,30 @@ public class HiveDelegator extends Launc args.add(appConf.hiveArchive()); } + //ship additional artifacts, for example for Tez + String extras = appConf.get(AppConfig.HIVE_EXTRA_FILES); + if(extras != null && extras.length() > 0) { + boolean foundFiles = false; + for(int i = 0; i < args.size(); i++) { + if(FILES.equals(args.get(i))) { + String value = args.get(i + 1); + args.set(i + 1, value + "," + extras); + foundFiles = true; + } + } + if(!foundFiles) { + args.add(FILES); + args.add(extras); + } + String[] extraFiles = appConf.getStrings(AppConfig.HIVE_EXTRA_FILES); + StringBuilder extraFileNames = new StringBuilder(); + //now tell LaunchMapper which files it should add to HADOOP_CLASSPATH + for(String file : extraFiles) { + Path p = new Path(file); + extraFileNames.append(p.getName()).append(","); + } + addDef(args, JobSubmissionConstants.HADOOP_CLASSPATH_EXTRAS, extraFileNames.toString()); + } return args; } } Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java?rev=1670162&r1=1670161&r2=1670162&view=diff ============================================================================== --- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java (original) +++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java Mon Mar 30 18:10:29 2015 @@ -28,6 +28,10 @@ public class TempletonDelegator { * http://hadoop.apache.org/docs/r1.0.4/commands_manual.html#Generic+Options */ public static final String ARCHIVES = "-archives"; + /** + * http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/CommandsManual.html#Generic_Options + */ + public static final String FILES = "-files"; protected AppConfig appConf; Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java?rev=1670162&r1=1670161&r2=1670162&view=diff ============================================================================== --- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java (original) +++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java Mon Mar 30 18:10:29 2015 @@ -31,6 +31,12 @@ public interface JobSubmissionConstants public static final String EXIT_FNAME = "exit"; public static final int WATCHER_TIMEOUT_SECS = 10; public static final int KEEP_ALIVE_MSEC = 60 * 1000; + /** + * A comma-separated list of files to be added to HADOOP_CLASSPATH in + * {@link org.apache.hive.hcatalog.templeton.tool.LaunchMapper}. Used to localize additional + * artifacts for job submission requests. + */ + public static final String HADOOP_CLASSPATH_EXTRAS = "templeton.hadoop.classpath.extras"; /* * The = sign in the string for TOKEN_FILE_ARG_PLACEHOLDER is required because * org.apache.hadoop.util.GenericOptionsParser.preProcessForWindows() prepares Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java?rev=1670162&r1=1670161&r2=1670162&view=diff ============================================================================== --- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java (original) +++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java Mon Mar 30 18:10:29 2015 @@ -21,6 +21,7 @@ package org.apache.hive.hcatalog.templet import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.classification.InterfaceAudience; @@ -33,7 +34,6 @@ import org.apache.hadoop.mapreduce.Mappe import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; -import org.apache.hive.hcatalog.templeton.AppConfig; import org.apache.hive.hcatalog.templeton.BadParam; import org.apache.hive.hcatalog.templeton.LauncherDelegator; @@ -115,6 +115,32 @@ public class LaunchMapper extends Mapper } } } + private static void handleHadoopClasspathExtras(Configuration conf, Map<String, String> env) + throws IOException { + if(!TempletonUtils.isset(conf.get(JobSubmissionConstants.HADOOP_CLASSPATH_EXTRAS))) { + return; + } + LOG.debug(HADOOP_CLASSPATH_EXTRAS + "=" + conf.get(HADOOP_CLASSPATH_EXTRAS)); + String[] files = conf.getStrings(HADOOP_CLASSPATH_EXTRAS); + StringBuilder paths = new StringBuilder(); + FileSystem fs = FileSystem.getLocal(conf);//these have been localized already + for(String f : files) { + Path p = new Path(f); + FileStatus fileStatus = fs.getFileStatus(p); + paths.append(f); + if(fileStatus.isDirectory()) { + paths.append(File.separator).append("*"); + } + paths.append(File.pathSeparator); + } + paths.setLength(paths.length() - 1); + if(TempletonUtils.isset(System.getenv("HADOOP_CLASSPATH"))) { + env.put("HADOOP_CLASSPATH", System.getenv("HADOOP_CLASSPATH") + File.pathSeparator + paths); + } + else { + env.put("HADOOP_CLASSPATH", paths.toString()); + } + } protected Process startJob(Context context, String user, String overrideClasspath) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); @@ -135,6 +161,7 @@ public class LaunchMapper extends Mapper Map<String, String> env = TempletonUtils.hadoopUserEnv(user, overrideClasspath); handlePigEnvVars(conf, env); handleSqoop(conf, env); + handleHadoopClasspathExtras(conf, env); List<String> jarArgsList = new LinkedList<String>(Arrays.asList(jarArgs)); handleTokenFile(jarArgsList, JobSubmissionConstants.TOKEN_FILE_ARG_PLACEHOLDER, "mapreduce.job.credentials.binary"); handleTokenFile(jarArgsList, JobSubmissionConstants.TOKEN_FILE_ARG_PLACEHOLDER_TEZ, "tez.credentials.path"); Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java?rev=1670162&r1=1670161&r2=1670162&view=diff ============================================================================== --- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java (original) +++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java Mon Mar 30 18:10:29 2015 @@ -76,32 +76,31 @@ final class TrivialExecService { } } /** - * Print files and directories in current directory. Will list files in the sub-directory (only 1 level deep) - * time honored tradition in WebHCat of borrowing from Oozie + * Print files and directories in current {@code dir}. */ - private static void printContentsOfDir(String dir) { + private static StringBuilder printContentsOfDir(String dir, int depth, StringBuilder sb) { + StringBuilder indent = new StringBuilder(); + for(int i = 0; i < depth; i++) { + indent.append("--"); + } File folder = new File(dir); - StringBuilder sb = new StringBuilder("Files in '").append(dir).append("' dir:").append(folder.getAbsolutePath()).append('\n'); + sb.append(indent).append("Files in '").append(dir).append("' dir:").append(folder.getAbsolutePath()).append('\n'); File[] listOfFiles = folder.listFiles(); + if(listOfFiles == null) { + return sb; + } for (File fileName : listOfFiles) { if (fileName.isFile()) { - sb.append("File: ").append(fileName.getName()).append('\n'); + sb.append(indent).append("File: ").append(fileName.getName()).append('\n'); } else if (fileName.isDirectory()) { - sb.append("Dir: ").append(fileName.getName()).append('\n'); - File subDir = new File(fileName.getName()); - File[] moreFiles = subDir.listFiles(); - for (File subFileName : moreFiles) { - if (subFileName.isFile()) { - sb.append("--File: ").append(subFileName.getName()).append('\n'); - } - else if (subFileName.isDirectory()) { - sb.append("--Dir: ").append(subFileName.getName()).append('\n'); - } - } + printContentsOfDir(fileName.getName(), depth+1, sb); } } - LOG.info(sb.toString()); + return sb; + } + private static void printContentsOfDir(String dir) { + LOG.info(printContentsOfDir(dir, 0, new StringBuilder()).toString()); } }