Author: ekoifman
Date: Mon Mar 30 18:10:29 2015
New Revision: 1670162

URL: http://svn.apache.org/r1670162
Log:
HIVE-10066 Hive on Tez job submission through WebHCat doesn't ship Tez 
artifacts (Eugene Koifman, reviewed by Thejas Nair)

Modified:
    
hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml
    hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh
    hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml
    
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
    
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java
    
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java
    
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java
    
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java
    
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java

Modified: 
hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- 
hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml
 (original)
+++ 
hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml
 Mon Mar 30 18:10:29 2015
@@ -35,7 +35,7 @@
 
     <property>
         <name>templeton.libjars</name>
-        <value>${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.5.jar</value>
+        
<value>${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.6.jar,${env.TEMPLETON_HOME}/../lib/hive-common-1.2.0-SNAPSHOT.jar</value>
         <description>Jars to add to the classpath.</description>
     </property>
 
@@ -69,6 +69,11 @@
             shipped to the target node in the cluster to execute Pig job which 
uses 
             HCat, Hive query, etc.</description>
     </property>
+
+    <property>
+      <name>templeton.hive.extra.files</name>
+      
<value>${env.TEZ_CLIENT_HOME}/conf/tez-site.xml,${env.TEZ_CLIENT_HOME}/,${env.TEZ_CLIENT_HOME}/lib</value>
+    </property>
     <property>
         <name>templeton.hcat.home</name>
         
<value>apache-hive-${env.HIVE_VERSION}-bin.tar.gz/apache-hive-${env.HIVE_VERSION}-bin/hcatalog</value>
@@ -101,7 +106,7 @@
     </property>
 
     <property>
-        <!--\,thrift://127.0.0.1:9933-->
+        <!--\,thrift://127.0.0.1:9933,,hive.execution.engine=tez-->
         <name>templeton.hive.properties</name>
         
<value>hive.metastore.uris=thrift://localhost:9933,hive.metastore.sasl.enabled=false</value>
     </property>

Modified: hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh (original)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh Mon Mar 30 
18:10:29 2015
@@ -36,6 +36,10 @@ if [ -z ${PIG_VERSION} ]; then
   export PIG_VERSION=0.12.2-SNAPSHOT
 fi
 
+if [ -z ${TEZ_VERSION} ]; then
+  export TEZ_VERSION=0.5.3
+fi
+
 #Root of project source tree
 if [ -z ${PROJ_HOME} ]; then
   export PROJ_HOME=/Users/${USER}/dev/hive
@@ -46,6 +50,7 @@ if [ -z ${HADOOP_HOME} ]; then
   export 
HADOOP_HOME=/Users/${USER}/dev/hwxhadoop/hadoop-dist/target/hadoop-${HADOOP_VERSION}
 fi
 
+export TEZ_CLIENT_HOME=/Users/ekoifman/dev/apache-tez-client-${TEZ_VERSION}
 #Make sure Pig is built for the Hadoop version you are running
 export PIG_TAR_PATH=/Users/${USER}/dev/pig-${PIG_VERSION}-src/build
 #this is part of Pig distribution

Modified: hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml 
(original)
+++ hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml Mon Mar 
30 18:10:29 2015
@@ -39,7 +39,7 @@
 
   <property>
     <name>templeton.libjars</name>
-    
<value>${env.TEMPLETON_HOME}/share/webhcat/svr/lib/zookeeper-3.4.3.jar</value>
+    
<value>${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.6.jar,${env.TEMPLETON_HOME}/../lib/hive-common-1.2.0-SNAPSHOT.jar</value>
     <description>Jars to add to the classpath.</description>
   </property>
 
@@ -106,7 +106,20 @@
   <property>
     <name>templeton.hive.path</name>
     <value>hive-0.11.0.tar.gz/hive-0.11.0/bin/hive</value>
-    <description>The path to the Hive executable.</description>
+    <description>The path to the Hive executable.  Applies only if 
templeton.hive.archive is defined.</description>
+  </property>
+
+  <property>
+    <name>templeton.hive.extra.files</name>
+    <value>/tez-client/conf/tez-site.xml,/tez-client/,/tez-client/lib</value>
+    <description>The resources in this list will be localized to the node 
running LaunchMapper and added to HADOOP_CLASSPTH
+      before launching 'hive' command.  If the path /foo/bar is a directory, 
the contents of the the entire dir will be localized
+      and ./bar/* will be added to HADOOP_CLASSPATH.  Note that since 
classpath path processing does not recurse into subdirectories,
+      the paths in this property may be overlapping.  In the example above, 
"./tez-site.xml:./tez-client/*:./lib/*" will be added to
+      HADOOP_CLASSPATH.
+      This can be used to specify config files, Tez artifacts, etc.  This will 
be sent -files option of hadoop jar command thus
+      each path is interpreted by Generic Option Parser.  It can be local or 
hdfs path.
+    </description>
   </property>
 
   <property>

Modified: 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
 (original)
+++ 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
 Mon Mar 30 18:10:29 2015
@@ -154,6 +154,11 @@ public class AppConfig extends Configura
   public static final String HADOOP_MR_AM_JAVA_OPTS = 
"yarn.app.mapreduce.am.command-opts";
   public static final String HADOOP_MR_AM_MEMORY_MB = 
"yarn.app.mapreduce.am.resource.mb";
   public static final String UNIT_TEST_MODE     = "templeton.unit.test.mode";
+  /**
+   * comma-separated list of artifacts to add to HADOOP_CLASSPATH evn var in
+   * LaunchMapper before launching Hive command
+   */
+  public static final String HIVE_EXTRA_FILES = "templeton.hive.extra.files";
 
 
   private static final Log LOG = LogFactory.getLog(AppConfig.class);

Modified: 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java
 (original)
+++ 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java
 Mon Mar 30 18:10:29 2015
@@ -27,6 +27,7 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.exec.ExecuteException;
+import org.apache.hadoop.fs.Path;
 import org.apache.hive.hcatalog.templeton.tool.JobSubmissionConstants;
 import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob;
 import org.apache.hive.hcatalog.templeton.tool.TempletonUtils;
@@ -117,7 +118,7 @@ public class HiveDelegator extends Launc
   private List<String> makeBasicArgs(String execute, String srcFile, String 
otherFiles,
                                          String statusdir, String completedUrl,
                                          boolean enablelog)
-    throws URISyntaxException, FileNotFoundException, IOException,
+    throws URISyntaxException, IOException,
     InterruptedException
   {
     ArrayList<String> args = new ArrayList<String>();
@@ -142,6 +143,30 @@ public class HiveDelegator extends Launc
       args.add(appConf.hiveArchive());
     }
 
+    //ship additional artifacts, for example for Tez
+    String extras = appConf.get(AppConfig.HIVE_EXTRA_FILES); 
+    if(extras != null && extras.length() > 0) {
+      boolean foundFiles = false;
+      for(int i = 0; i < args.size(); i++) {
+        if(FILES.equals(args.get(i))) {
+          String value = args.get(i + 1);
+          args.set(i + 1, value + "," + extras);
+          foundFiles = true;
+        }
+      }
+      if(!foundFiles) {
+        args.add(FILES);
+        args.add(extras);
+      }
+      String[] extraFiles = appConf.getStrings(AppConfig.HIVE_EXTRA_FILES);
+      StringBuilder extraFileNames = new StringBuilder();
+      //now tell LaunchMapper which files it should add to HADOOP_CLASSPATH
+      for(String file : extraFiles) {
+        Path p = new Path(file);
+        extraFileNames.append(p.getName()).append(",");
+      }
+      addDef(args, JobSubmissionConstants.HADOOP_CLASSPATH_EXTRAS, 
extraFileNames.toString());
+    }
     return args;
   }
 }

Modified: 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java
 (original)
+++ 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java
 Mon Mar 30 18:10:29 2015
@@ -28,6 +28,10 @@ public class TempletonDelegator {
    * http://hadoop.apache.org/docs/r1.0.4/commands_manual.html#Generic+Options
    */
   public static final String ARCHIVES = "-archives";
+  /**
+   * 
http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/CommandsManual.html#Generic_Options
+   */
+  public static final String FILES = "-files";
   
   protected AppConfig appConf;
 

Modified: 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java
 (original)
+++ 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java
 Mon Mar 30 18:10:29 2015
@@ -31,6 +31,12 @@ public interface JobSubmissionConstants
   public static final String EXIT_FNAME = "exit";
   public static final int WATCHER_TIMEOUT_SECS = 10;
   public static final int KEEP_ALIVE_MSEC = 60 * 1000;
+  /**
+   * A comma-separated list of files to be added to HADOOP_CLASSPATH in 
+   * {@link org.apache.hive.hcatalog.templeton.tool.LaunchMapper}.  Used to 
localize additional
+   * artifacts for job submission requests.
+   */
+  public static final String HADOOP_CLASSPATH_EXTRAS = 
"templeton.hadoop.classpath.extras";
   /*
    * The = sign in the string for TOKEN_FILE_ARG_PLACEHOLDER is required 
because
    * org.apache.hadoop.util.GenericOptionsParser.preProcessForWindows() 
prepares

Modified: 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java
 (original)
+++ 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java
 Mon Mar 30 18:10:29 2015
@@ -21,6 +21,7 @@ package org.apache.hive.hcatalog.templet
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.classification.InterfaceAudience;
@@ -33,7 +34,6 @@ import org.apache.hadoop.mapreduce.Mappe
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.util.StringUtils;
-import org.apache.hive.hcatalog.templeton.AppConfig;
 import org.apache.hive.hcatalog.templeton.BadParam;
 import org.apache.hive.hcatalog.templeton.LauncherDelegator;
 
@@ -115,6 +115,32 @@ public class LaunchMapper extends Mapper
       }
     }
   }
+  private static void handleHadoopClasspathExtras(Configuration conf, 
Map<String, String> env)
+    throws IOException {
+    
if(!TempletonUtils.isset(conf.get(JobSubmissionConstants.HADOOP_CLASSPATH_EXTRAS)))
 {
+      return;
+    }
+    LOG.debug(HADOOP_CLASSPATH_EXTRAS + "=" + 
conf.get(HADOOP_CLASSPATH_EXTRAS));
+    String[] files = conf.getStrings(HADOOP_CLASSPATH_EXTRAS);
+    StringBuilder paths = new StringBuilder();
+    FileSystem fs = FileSystem.getLocal(conf);//these have been localized 
already
+    for(String f : files) {
+      Path p = new Path(f);
+      FileStatus fileStatus = fs.getFileStatus(p);
+      paths.append(f);
+      if(fileStatus.isDirectory()) {
+        paths.append(File.separator).append("*");
+      }
+      paths.append(File.pathSeparator);
+    }
+    paths.setLength(paths.length() - 1);
+    if(TempletonUtils.isset(System.getenv("HADOOP_CLASSPATH"))) {
+      env.put("HADOOP_CLASSPATH", System.getenv("HADOOP_CLASSPATH") + 
File.pathSeparator + paths);
+    }
+    else {
+      env.put("HADOOP_CLASSPATH", paths.toString());
+    }
+  }
   protected Process startJob(Context context, String user, String 
overrideClasspath)
     throws IOException, InterruptedException {
     Configuration conf = context.getConfiguration();
@@ -135,6 +161,7 @@ public class LaunchMapper extends Mapper
     Map<String, String> env = TempletonUtils.hadoopUserEnv(user, 
overrideClasspath);
     handlePigEnvVars(conf, env);
     handleSqoop(conf, env);
+    handleHadoopClasspathExtras(conf, env);    
     List<String> jarArgsList = new LinkedList<String>(Arrays.asList(jarArgs));
     handleTokenFile(jarArgsList, 
JobSubmissionConstants.TOKEN_FILE_ARG_PLACEHOLDER, 
"mapreduce.job.credentials.binary");
     handleTokenFile(jarArgsList, 
JobSubmissionConstants.TOKEN_FILE_ARG_PLACEHOLDER_TEZ, "tez.credentials.path");

Modified: 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java
 (original)
+++ 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java
 Mon Mar 30 18:10:29 2015
@@ -76,32 +76,31 @@ final class TrivialExecService {
     }
   }
   /**
-   * Print files and directories in current directory. Will list files in the 
sub-directory (only 1 level deep)
-   * time honored tradition in WebHCat of borrowing from Oozie
+   * Print files and directories in current {@code dir}.
    */
-  private static void printContentsOfDir(String dir) {
+  private static StringBuilder printContentsOfDir(String dir, int depth, 
StringBuilder sb) {
+    StringBuilder indent = new StringBuilder();
+    for(int i = 0; i < depth; i++) {
+      indent.append("--");
+    }
     File folder = new File(dir);
-    StringBuilder sb = new StringBuilder("Files in '").append(dir).append("' 
dir:").append(folder.getAbsolutePath()).append('\n');
+    sb.append(indent).append("Files in '").append(dir).append("' 
dir:").append(folder.getAbsolutePath()).append('\n');
 
     File[] listOfFiles = folder.listFiles();
+    if(listOfFiles == null) {
+      return sb;
+    }
     for (File fileName : listOfFiles) {
       if (fileName.isFile()) {
-        sb.append("File: ").append(fileName.getName()).append('\n');
+        sb.append(indent).append("File: 
").append(fileName.getName()).append('\n');
       }
       else if (fileName.isDirectory()) {
-        sb.append("Dir: ").append(fileName.getName()).append('\n');
-        File subDir = new File(fileName.getName());
-        File[] moreFiles = subDir.listFiles();
-        for (File subFileName : moreFiles) {
-          if (subFileName.isFile()) {
-            sb.append("--File: ").append(subFileName.getName()).append('\n');
-          }
-          else if (subFileName.isDirectory()) {
-            sb.append("--Dir: ").append(subFileName.getName()).append('\n');
-          }
-        }
+        printContentsOfDir(fileName.getName(), depth+1, sb);
       }
     }
-    LOG.info(sb.toString());
+    return sb;
+  }
+  private static void printContentsOfDir(String dir) {
+    LOG.info(printContentsOfDir(dir, 0, new StringBuilder()).toString());    
   }
 }


Reply via email to