Author: tucu
Date: Wed Feb 22 19:52:30 2012
New Revision: 1292480

URL: http://svn.apache.org/viewvc?rev=1292480&view=rev
Log:
OOZIE-703 Improve/Consolidate Hadoop job ID log harvesting logic (tucu)

Modified:
    
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HiveMain.java
    
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java
    
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMain.java
    
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMainWithOldAPI.java
    
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/SqoopMain.java
    
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveActionExecutor.java
    
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveMain.java
    
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestSqoopActionExecutor.java
    incubator/oozie/trunk/release-log.txt

Modified: 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HiveMain.java
URL: 
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HiveMain.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
--- 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HiveMain.java
 (original)
+++ 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HiveMain.java
 Wed Feb 22 19:52:30 2012
@@ -32,6 +32,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Properties;
+import java.util.regex.Pattern;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -40,6 +41,10 @@ import org.apache.hadoop.hive.cli.CliDri
 public class HiveMain extends LauncherMain {
     public static final String USER_HIVE_DEFAULT_FILE = 
"oozie-user-hive-default.xml";
 
+    private static final Pattern[] HIVE_JOB_IDS_PATTERNS = {
+      Pattern.compile("Ended Job = (job_\\S*)")
+    };
+
     public static final String HIVE_L4J_PROPS = "hive-log4j.properties";
     public static final String HIVE_EXEC_L4J_PROPS = 
"hive-exec-log4j.properties";
     public static final String HIVE_SITE_CONF = "hive-site.xml";
@@ -288,12 +293,12 @@ public class HiveMain extends LauncherMa
         System.out.println("\n<<< Invocation of Hive command completed <<<\n");
 
         // harvesting and recording Hadoop Job IDs
-        Properties jobIds = getHadoopJobIds(logFile, JOB_ID_LOG_PREFIX);
+        Properties jobIds = getHadoopJobIds(logFile, HIVE_JOB_IDS_PATTERNS);
         File file = new 
File(System.getProperty("oozie.action.output.properties"));
         OutputStream os = new FileOutputStream(file);
         jobIds.store(os, "");
         os.close();
-        System.out.println(" Hadoop Job IDs executed by Hive: " + 
jobIds.getProperty("hadoopJobs"));
+        System.out.println(" Hadoop Job IDs executed by Hive: " + 
jobIds.getProperty(HADOOP_JOBS));
         System.out.println();
     }
 
@@ -347,37 +352,4 @@ public class HiveMain extends LauncherMa
         return expr;
     }
 
-    //TODO: Hive should provide a programmatic way of spitting out Hadoop jobs
-    private static final String JOB_ID_LOG_PREFIX = "Ended Job = ";
-
-    public static Properties getHadoopJobIds(String logFile, String prefix) 
throws IOException {
-        Properties props = new Properties();
-        StringBuffer sb = new StringBuffer(100);
-        if (!new File(logFile).exists()) {
-            System.err.println("hive log file: " + logFile + "  not present. 
Therefore no Hadoop jobids found");
-            props.setProperty("hadoopJobs", "");
-        }
-        else {
-            BufferedReader br = new BufferedReader(new FileReader(logFile));
-            String line = br.readLine();
-            String separator = "";
-            while (line != null) {
-                if (line.contains(prefix)) {
-                    int jobIdStarts = line.indexOf(prefix) + prefix.length();
-                    String jobId = line.substring(jobIdStarts).trim();
-
-                    //Doing this because Hive now does things like 
ConditionalTask which are not Hadoop jobs.
-                    if (jobId.startsWith("job_")) {
-                        sb.append(separator).append(jobId);
-                        separator = ",";
-                    }
-                }
-                line = br.readLine();
-            }
-            br.close();
-            props.setProperty("hadoopJobs", sb.toString());
-        }
-        return props;
-    }
-
 }

Modified: 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java
URL: 
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
--- 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java
 (original)
+++ 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java
 Wed Feb 22 19:52:30 2012
@@ -17,18 +17,54 @@
  */
 package org.apache.oozie.action.hadoop;
 
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
 import java.io.IOException;
 import java.io.StringWriter;
 import java.util.Collection;
 import java.util.Map;
+import java.util.Properties;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 public abstract class LauncherMain {
 
+    public static final String HADOOP_JOBS = "hadoopJobs";
+
     protected static void run(Class<? extends LauncherMain> klass, String[] 
args) throws Exception {
         LauncherMain main = klass.newInstance();
         main.run(args);
     }
 
+    public static Properties getHadoopJobIds(String logFile, Pattern[] 
patterns) throws IOException {
+        Properties props = new Properties();
+        StringBuffer sb = new StringBuffer(100);
+        if (!new File(logFile).exists()) {
+            System.err.println("Log file: " + logFile + "  not present. 
Therefore no Hadoop jobids found");
+            props.setProperty(HADOOP_JOBS, "");
+        }
+        else {
+            BufferedReader br = new BufferedReader(new FileReader(logFile));
+            String line = br.readLine();
+            String separator = "";
+            while (line != null) {
+                for (Pattern pattern : patterns) {
+                    Matcher matcher = pattern.matcher(line);
+                    if (matcher.find()) {
+                        String jobId = matcher.group(1);
+                        sb.append(separator).append(jobId);
+                        separator = ",";
+                    }
+                }
+                line = br.readLine();
+            }
+            br.close();
+            props.setProperty(HADOOP_JOBS, sb.toString());
+        }
+        return props;
+    }
+
     protected abstract void run(String[] args) throws Exception;
 
     /**

Modified: 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMain.java
URL: 
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMain.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
--- 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMain.java
 (original)
+++ 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMain.java
 Wed Feb 22 19:52:30 2012
@@ -41,6 +41,7 @@ import java.util.ArrayList;
 import java.util.Properties;
 import java.util.Set;
 import java.net.URL;
+import java.util.regex.Pattern;
 
 public class PigMain extends LauncherMain {
     private static final Set<String> DISALLOWED_PIG_OPTIONS = new 
HashSet<String>();
@@ -50,6 +51,10 @@ public class PigMain extends LauncherMai
     public static final String EXTERNAL_STATS_WRITE = ACTION_PREFIX + 
"external.stats.write";
     public static final int STRING_BUFFER_SIZE = 100;
 
+    private static final Pattern[] PIG_JOB_IDS_PATTERNS = {
+      Pattern.compile("HadoopJobId: (job_\\S*)")
+    };
+
     static {
         DISALLOWED_PIG_OPTIONS.add("-4");
         DISALLOWED_PIG_OPTIONS.add("-log4jconf");
@@ -224,9 +229,9 @@ public class PigMain extends LauncherMai
         // So retrieving hadoop Ids here
         File file = new File(System.getProperty(EXTERNAL_CHILD_IDS));
         if (!file.exists()) {
-            String jobIds = getHadoopJobIds(logFile);
-            writeExternalData(jobIds, file);
-            System.out.println(" Hadoop Job IDs executed by Pig: " + jobIds);
+            Properties props = getHadoopJobIds(logFile, PIG_JOB_IDS_PATTERNS);
+            writeExternalData(props.getProperty(HADOOP_JOBS), file);
+            System.out.println(" Hadoop Job IDs executed by Pig: " + 
props.getProperty(HADOOP_JOBS));
             System.out.println();
         }
     }
@@ -355,43 +360,6 @@ public class PigMain extends LauncherMai
         MapReduceMain.setStrings(conf, "oozie.pig.args", args);
     }
 
-    private static final String JOB_ID_LOG_PREFIX = "HadoopJobId: ";
-
-    /**
-     * Get Hadoop Ids by parsing the log file
-     *
-     * @param logFile the pig log file
-     * @return comma-separated String
-     */
-    protected String getHadoopJobIds(String logFile) throws IOException {
-        StringBuilder sb = new StringBuilder(STRING_BUFFER_SIZE);
-        if (new File(logFile).exists() == false) {
-            System.err.println("pig log file: " + logFile + "  not present. 
Therefore no Hadoop jobids found");
-            return sb.toString();
-        }
-        BufferedReader br = new BufferedReader(new FileReader(logFile));
-        String line = br.readLine();
-        String separator = ",";
-        while (line != null) {
-            if (line.contains(JOB_ID_LOG_PREFIX)) {
-                int jobIdStarts = line.indexOf(JOB_ID_LOG_PREFIX) + 
JOB_ID_LOG_PREFIX.length();
-                String jobId = line.substring(jobIdStarts);
-                int jobIdEnds = jobId.indexOf(" ");
-                if (jobIdEnds > -1) {
-                    jobId = jobId.substring(0, jobId.indexOf(" "));
-                }
-                if (sb.length() > 0) {
-                    sb.append(separator);
-                }
-                sb.append(jobId);
-            }
-            line = br.readLine();
-        }
-        br.close();
-        return sb.toString();
-    }
-
-
     /**
      * Get Hadoop Ids through PigStats API
      *

Modified: 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMainWithOldAPI.java
URL: 
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMainWithOldAPI.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
--- 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMainWithOldAPI.java
 (original)
+++ 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMainWithOldAPI.java
 Wed Feb 22 19:52:30 2012
@@ -249,7 +249,7 @@ public class PigMainWithOldAPI extends L
         os = new FileOutputStream(file);
         jobIds.store(os, "");
         os.close();
-        System.out.println(" Hadoop Job IDs executed by Pig: " + 
jobIds.getProperty("hadoopJobs"));
+        System.out.println(" Hadoop Job IDs executed by Pig: " + 
jobIds.getProperty(HADOOP_JOBS));
         System.out.println();
     }
 
@@ -272,7 +272,7 @@ public class PigMainWithOldAPI extends L
         StringBuffer sb = new StringBuffer(100);
         if (new File(logFile).exists() == false) {
             System.err.println("pig log file: " + logFile + "  not present. 
Therefore no Hadoop jobids found");
-            props.setProperty("hadoopJobs", "");
+            props.setProperty(HADOOP_JOBS, "");
         }
         else {
             BufferedReader br = new BufferedReader(new FileReader(logFile));
@@ -292,7 +292,7 @@ public class PigMainWithOldAPI extends L
                 line = br.readLine();
             }
             br.close();
-            props.setProperty("hadoopJobs", sb.toString());
+            props.setProperty(HADOOP_JOBS, sb.toString());
         }
         return props;
     }

Modified: 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/SqoopMain.java
URL: 
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/SqoopMain.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
--- 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/SqoopMain.java
 (original)
+++ 
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/SqoopMain.java
 Wed Feb 22 19:52:30 2012
@@ -24,6 +24,7 @@ import java.io.OutputStream;
 import java.net.URL;
 import java.util.Map;
 import java.util.Properties;
+import java.util.regex.Pattern;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -36,7 +37,10 @@ public class SqoopMain extends LauncherM
 
     public static final String SQOOP_SITE_CONF = "sqoop-site.xml";
 
-    private static final String JOB_ID_LOG_PREFIX = "Job complete: ";
+    private static final Pattern[] SQOOP_JOB_IDS_PATTERNS = {
+      Pattern.compile("Job complete: (job_\\S*)"), Pattern.compile("Job 
(job_\\S*) completed successfully")
+    };
+
     private static final String SQOOP_LOG4J_PROPS = "sqoop-log4j.properties";
 
     public static void main(String[] args) throws Exception {
@@ -126,6 +130,7 @@ public class SqoopMain extends LauncherM
         hadoopProps.setProperty("log4j.appender.jobid.layout", 
"org.apache.log4j.PatternLayout");
         
hadoopProps.setProperty("log4j.appender.jobid.layout.ConversionPattern", "%-4r 
[%t] %-5p %c %x - %m%n");
         hadoopProps.setProperty("log4j.logger.org.apache.hadoop.mapred", 
"INFO, jobid");
+        
hadoopProps.setProperty("log4j.logger.org.apache.hadoop.mapreduce.Job", "INFO, 
jobid");
 
         String localProps = new File(SQOOP_LOG4J_PROPS).getAbsolutePath();
         OutputStream os1 = new FileOutputStream(localProps);
@@ -181,7 +186,8 @@ public class SqoopMain extends LauncherM
         System.out.println();
 
         // harvesting and recording Hadoop Job IDs
-        Properties jobIds = HiveMain.getHadoopJobIds(logFile, 
JOB_ID_LOG_PREFIX);
+        Properties jobIds = getHadoopJobIds(logFile, SQOOP_JOB_IDS_PATTERNS);
+
         File file = new 
File(System.getProperty("oozie.action.output.properties"));
         OutputStream os = new FileOutputStream(file);
         try {
@@ -190,7 +196,7 @@ public class SqoopMain extends LauncherM
         finally {
             os.close();
         }
-        System.out.println(" Hadoop Job IDs executed by Sqoop: " + 
jobIds.getProperty("hadoopJobs"));
+        System.out.println(" Hadoop Job IDs executed by Sqoop: " + 
jobIds.getProperty(HADOOP_JOBS));
         System.out.println();
     }
 

Modified: 
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveActionExecutor.java
URL: 
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveActionExecutor.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
--- 
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveActionExecutor.java
 (original)
+++ 
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveActionExecutor.java
 Wed Feb 22 19:52:30 2012
@@ -198,9 +198,9 @@ public class TestHiveActionExecutor exte
         assertNotNull(context.getAction().getData());
         Properties outputData = new Properties();
         outputData.load(new StringReader(context.getAction().getData()));
-        assertTrue(outputData.containsKey("hadoopJobs"));
+        assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
         //while this works in a real cluster, it does not with miniMR
-        //assertTrue(outputData.getProperty("hadoopJobs").trim().length() > 0);
+        
//assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 
0);
 
         assertTrue(fs.exists(outputDir));
         assertTrue(fs.isDirectory(outputDir));

Modified: 
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveMain.java
URL: 
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveMain.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
--- 
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveMain.java
 (original)
+++ 
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveMain.java
 Wed Feb 22 19:52:30 2012
@@ -214,8 +214,8 @@ public class TestHiveMain extends MainTe
 //TODO: I cannot figure out why when log file is not created in this testcase, 
it works when running in Launcher
 //            Properties props = new Properties();
 //            props.load(new FileReader(outputDataFile));
-//            assertTrue(props.containsKey("hadoopJobs"));
-//            assertTrue(props.getProperty("hadoopJobs").trim().length() > 0);
+//            assertTrue(props.containsKey(LauncherMain.HADOOP_JOBS));
+//            
assertTrue(props.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 0);
         }
         return null;
     }

Modified: 
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestSqoopActionExecutor.java
URL: 
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestSqoopActionExecutor.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
--- 
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestSqoopActionExecutor.java
 (original)
+++ 
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestSqoopActionExecutor.java
 Wed Feb 22 19:52:30 2012
@@ -195,8 +195,8 @@ public class TestSqoopActionExecutor ext
         assertNotNull(context.getAction().getData());
         Properties outputData = new Properties();
         outputData.load(new StringReader(context.getAction().getData()));
-        assertTrue(outputData.containsKey("hadoopJobs"));
-        assertTrue(outputData.getProperty("hadoopJobs").trim().length() > 0);
+        assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
+        
assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 
0);
     }
 
     public void testSqoopActionFreeFormQuery() throws Exception {
@@ -244,8 +244,8 @@ public class TestSqoopActionExecutor ext
         assertNotNull(context.getAction().getData());
         Properties outputData = new Properties();
         outputData.load(new StringReader(context.getAction().getData()));
-        assertTrue(outputData.containsKey("hadoopJobs"));
-        assertTrue(outputData.getProperty("hadoopJobs").trim().length() > 0);
+        assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
+        
assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 
0);
     }
 
 

Modified: incubator/oozie/trunk/release-log.txt
URL: 
http://svn.apache.org/viewvc/incubator/oozie/trunk/release-log.txt?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
--- incubator/oozie/trunk/release-log.txt (original)
+++ incubator/oozie/trunk/release-log.txt Wed Feb 22 19:52:30 2012
@@ -1,5 +1,6 @@
 -- Oozie 3.2.0 release
 
+OOZIE-703 Improve/Consolidate Hadoop job ID log harvesting logic (tucu)
 OOZIE-702 XTestCase minicluster fails with Hadoop 1.x (tucu)
 OOZIE-700 update hadooplibs versions (tucu)
 OOZIE-698 Make sharelib components version dependency configurable (tucu)


Reply via email to