Author: tucu
Date: Wed Feb 22 19:52:30 2012
New Revision: 1292480
URL: http://svn.apache.org/viewvc?rev=1292480&view=rev
Log:
OOZIE-703 Improve/Consolidate Hadoop job ID log harvesting logic (tucu)
Modified:
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HiveMain.java
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMain.java
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMainWithOldAPI.java
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/SqoopMain.java
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveActionExecutor.java
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveMain.java
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestSqoopActionExecutor.java
incubator/oozie/trunk/release-log.txt
Modified:
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HiveMain.java
URL:
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HiveMain.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
---
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HiveMain.java
(original)
+++
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/HiveMain.java
Wed Feb 22 19:52:30 2012
@@ -32,6 +32,7 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
+import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -40,6 +41,10 @@ import org.apache.hadoop.hive.cli.CliDri
public class HiveMain extends LauncherMain {
public static final String USER_HIVE_DEFAULT_FILE =
"oozie-user-hive-default.xml";
+ private static final Pattern[] HIVE_JOB_IDS_PATTERNS = {
+ Pattern.compile("Ended Job = (job_\\S*)")
+ };
+
public static final String HIVE_L4J_PROPS = "hive-log4j.properties";
public static final String HIVE_EXEC_L4J_PROPS =
"hive-exec-log4j.properties";
public static final String HIVE_SITE_CONF = "hive-site.xml";
@@ -288,12 +293,12 @@ public class HiveMain extends LauncherMa
System.out.println("\n<<< Invocation of Hive command completed <<<\n");
// harvesting and recording Hadoop Job IDs
- Properties jobIds = getHadoopJobIds(logFile, JOB_ID_LOG_PREFIX);
+ Properties jobIds = getHadoopJobIds(logFile, HIVE_JOB_IDS_PATTERNS);
File file = new
File(System.getProperty("oozie.action.output.properties"));
OutputStream os = new FileOutputStream(file);
jobIds.store(os, "");
os.close();
- System.out.println(" Hadoop Job IDs executed by Hive: " +
jobIds.getProperty("hadoopJobs"));
+ System.out.println(" Hadoop Job IDs executed by Hive: " +
jobIds.getProperty(HADOOP_JOBS));
System.out.println();
}
@@ -347,37 +352,4 @@ public class HiveMain extends LauncherMa
return expr;
}
- //TODO: Hive should provide a programmatic way of spitting out Hadoop jobs
- private static final String JOB_ID_LOG_PREFIX = "Ended Job = ";
-
- public static Properties getHadoopJobIds(String logFile, String prefix)
throws IOException {
- Properties props = new Properties();
- StringBuffer sb = new StringBuffer(100);
- if (!new File(logFile).exists()) {
- System.err.println("hive log file: " + logFile + " not present.
Therefore no Hadoop jobids found");
- props.setProperty("hadoopJobs", "");
- }
- else {
- BufferedReader br = new BufferedReader(new FileReader(logFile));
- String line = br.readLine();
- String separator = "";
- while (line != null) {
- if (line.contains(prefix)) {
- int jobIdStarts = line.indexOf(prefix) + prefix.length();
- String jobId = line.substring(jobIdStarts).trim();
-
- //Doing this because Hive now does things like
ConditionalTask which are not Hadoop jobs.
- if (jobId.startsWith("job_")) {
- sb.append(separator).append(jobId);
- separator = ",";
- }
- }
- line = br.readLine();
- }
- br.close();
- props.setProperty("hadoopJobs", sb.toString());
- }
- return props;
- }
-
}
Modified:
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java
URL:
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
---
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java
(original)
+++
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java
Wed Feb 22 19:52:30 2012
@@ -17,18 +17,54 @@
*/
package org.apache.oozie.action.hadoop;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
import java.io.IOException;
import java.io.StringWriter;
import java.util.Collection;
import java.util.Map;
+import java.util.Properties;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
public abstract class LauncherMain {
+ public static final String HADOOP_JOBS = "hadoopJobs";
+
protected static void run(Class<? extends LauncherMain> klass, String[]
args) throws Exception {
LauncherMain main = klass.newInstance();
main.run(args);
}
+ public static Properties getHadoopJobIds(String logFile, Pattern[]
patterns) throws IOException {
+ Properties props = new Properties();
+ StringBuffer sb = new StringBuffer(100);
+ if (!new File(logFile).exists()) {
+ System.err.println("Log file: " + logFile + " not present.
Therefore no Hadoop jobids found");
+ props.setProperty(HADOOP_JOBS, "");
+ }
+ else {
+ BufferedReader br = new BufferedReader(new FileReader(logFile));
+ String line = br.readLine();
+ String separator = "";
+ while (line != null) {
+ for (Pattern pattern : patterns) {
+ Matcher matcher = pattern.matcher(line);
+ if (matcher.find()) {
+ String jobId = matcher.group(1);
+ sb.append(separator).append(jobId);
+ separator = ",";
+ }
+ }
+ line = br.readLine();
+ }
+ br.close();
+ props.setProperty(HADOOP_JOBS, sb.toString());
+ }
+ return props;
+ }
+
protected abstract void run(String[] args) throws Exception;
/**
Modified:
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMain.java
URL:
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMain.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
---
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMain.java
(original)
+++
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMain.java
Wed Feb 22 19:52:30 2012
@@ -41,6 +41,7 @@ import java.util.ArrayList;
import java.util.Properties;
import java.util.Set;
import java.net.URL;
+import java.util.regex.Pattern;
public class PigMain extends LauncherMain {
private static final Set<String> DISALLOWED_PIG_OPTIONS = new
HashSet<String>();
@@ -50,6 +51,10 @@ public class PigMain extends LauncherMai
public static final String EXTERNAL_STATS_WRITE = ACTION_PREFIX +
"external.stats.write";
public static final int STRING_BUFFER_SIZE = 100;
+ private static final Pattern[] PIG_JOB_IDS_PATTERNS = {
+ Pattern.compile("HadoopJobId: (job_\\S*)")
+ };
+
static {
DISALLOWED_PIG_OPTIONS.add("-4");
DISALLOWED_PIG_OPTIONS.add("-log4jconf");
@@ -224,9 +229,9 @@ public class PigMain extends LauncherMai
// So retrieving hadoop Ids here
File file = new File(System.getProperty(EXTERNAL_CHILD_IDS));
if (!file.exists()) {
- String jobIds = getHadoopJobIds(logFile);
- writeExternalData(jobIds, file);
- System.out.println(" Hadoop Job IDs executed by Pig: " + jobIds);
+ Properties props = getHadoopJobIds(logFile, PIG_JOB_IDS_PATTERNS);
+ writeExternalData(props.getProperty(HADOOP_JOBS), file);
+ System.out.println(" Hadoop Job IDs executed by Pig: " +
props.getProperty(HADOOP_JOBS));
System.out.println();
}
}
@@ -355,43 +360,6 @@ public class PigMain extends LauncherMai
MapReduceMain.setStrings(conf, "oozie.pig.args", args);
}
- private static final String JOB_ID_LOG_PREFIX = "HadoopJobId: ";
-
- /**
- * Get Hadoop Ids by parsing the log file
- *
- * @param logFile the pig log file
- * @return comma-separated String
- */
- protected String getHadoopJobIds(String logFile) throws IOException {
- StringBuilder sb = new StringBuilder(STRING_BUFFER_SIZE);
- if (new File(logFile).exists() == false) {
- System.err.println("pig log file: " + logFile + " not present.
Therefore no Hadoop jobids found");
- return sb.toString();
- }
- BufferedReader br = new BufferedReader(new FileReader(logFile));
- String line = br.readLine();
- String separator = ",";
- while (line != null) {
- if (line.contains(JOB_ID_LOG_PREFIX)) {
- int jobIdStarts = line.indexOf(JOB_ID_LOG_PREFIX) +
JOB_ID_LOG_PREFIX.length();
- String jobId = line.substring(jobIdStarts);
- int jobIdEnds = jobId.indexOf(" ");
- if (jobIdEnds > -1) {
- jobId = jobId.substring(0, jobId.indexOf(" "));
- }
- if (sb.length() > 0) {
- sb.append(separator);
- }
- sb.append(jobId);
- }
- line = br.readLine();
- }
- br.close();
- return sb.toString();
- }
-
-
/**
* Get Hadoop Ids through PigStats API
*
Modified:
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMainWithOldAPI.java
URL:
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMainWithOldAPI.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
---
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMainWithOldAPI.java
(original)
+++
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/PigMainWithOldAPI.java
Wed Feb 22 19:52:30 2012
@@ -249,7 +249,7 @@ public class PigMainWithOldAPI extends L
os = new FileOutputStream(file);
jobIds.store(os, "");
os.close();
- System.out.println(" Hadoop Job IDs executed by Pig: " +
jobIds.getProperty("hadoopJobs"));
+ System.out.println(" Hadoop Job IDs executed by Pig: " +
jobIds.getProperty(HADOOP_JOBS));
System.out.println();
}
@@ -272,7 +272,7 @@ public class PigMainWithOldAPI extends L
StringBuffer sb = new StringBuffer(100);
if (new File(logFile).exists() == false) {
System.err.println("pig log file: " + logFile + " not present.
Therefore no Hadoop jobids found");
- props.setProperty("hadoopJobs", "");
+ props.setProperty(HADOOP_JOBS, "");
}
else {
BufferedReader br = new BufferedReader(new FileReader(logFile));
@@ -292,7 +292,7 @@ public class PigMainWithOldAPI extends L
line = br.readLine();
}
br.close();
- props.setProperty("hadoopJobs", sb.toString());
+ props.setProperty(HADOOP_JOBS, sb.toString());
}
return props;
}
Modified:
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/SqoopMain.java
URL:
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/SqoopMain.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
---
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/SqoopMain.java
(original)
+++
incubator/oozie/trunk/core/src/main/java/org/apache/oozie/action/hadoop/SqoopMain.java
Wed Feb 22 19:52:30 2012
@@ -24,6 +24,7 @@ import java.io.OutputStream;
import java.net.URL;
import java.util.Map;
import java.util.Properties;
+import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -36,7 +37,10 @@ public class SqoopMain extends LauncherM
public static final String SQOOP_SITE_CONF = "sqoop-site.xml";
- private static final String JOB_ID_LOG_PREFIX = "Job complete: ";
+ private static final Pattern[] SQOOP_JOB_IDS_PATTERNS = {
+ Pattern.compile("Job complete: (job_\\S*)"), Pattern.compile("Job
(job_\\S*) completed successfully")
+ };
+
private static final String SQOOP_LOG4J_PROPS = "sqoop-log4j.properties";
public static void main(String[] args) throws Exception {
@@ -126,6 +130,7 @@ public class SqoopMain extends LauncherM
hadoopProps.setProperty("log4j.appender.jobid.layout",
"org.apache.log4j.PatternLayout");
hadoopProps.setProperty("log4j.appender.jobid.layout.ConversionPattern", "%-4r
[%t] %-5p %c %x - %m%n");
hadoopProps.setProperty("log4j.logger.org.apache.hadoop.mapred",
"INFO, jobid");
+
hadoopProps.setProperty("log4j.logger.org.apache.hadoop.mapreduce.Job", "INFO,
jobid");
String localProps = new File(SQOOP_LOG4J_PROPS).getAbsolutePath();
OutputStream os1 = new FileOutputStream(localProps);
@@ -181,7 +186,8 @@ public class SqoopMain extends LauncherM
System.out.println();
// harvesting and recording Hadoop Job IDs
- Properties jobIds = HiveMain.getHadoopJobIds(logFile,
JOB_ID_LOG_PREFIX);
+ Properties jobIds = getHadoopJobIds(logFile, SQOOP_JOB_IDS_PATTERNS);
+
File file = new
File(System.getProperty("oozie.action.output.properties"));
OutputStream os = new FileOutputStream(file);
try {
@@ -190,7 +196,7 @@ public class SqoopMain extends LauncherM
finally {
os.close();
}
- System.out.println(" Hadoop Job IDs executed by Sqoop: " +
jobIds.getProperty("hadoopJobs"));
+ System.out.println(" Hadoop Job IDs executed by Sqoop: " +
jobIds.getProperty(HADOOP_JOBS));
System.out.println();
}
Modified:
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveActionExecutor.java
URL:
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveActionExecutor.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
---
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveActionExecutor.java
(original)
+++
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveActionExecutor.java
Wed Feb 22 19:52:30 2012
@@ -198,9 +198,9 @@ public class TestHiveActionExecutor exte
assertNotNull(context.getAction().getData());
Properties outputData = new Properties();
outputData.load(new StringReader(context.getAction().getData()));
- assertTrue(outputData.containsKey("hadoopJobs"));
+ assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
//while this works in a real cluster, it does not with miniMR
- //assertTrue(outputData.getProperty("hadoopJobs").trim().length() > 0);
+
//assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() >
0);
assertTrue(fs.exists(outputDir));
assertTrue(fs.isDirectory(outputDir));
Modified:
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveMain.java
URL:
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveMain.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
---
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveMain.java
(original)
+++
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestHiveMain.java
Wed Feb 22 19:52:30 2012
@@ -214,8 +214,8 @@ public class TestHiveMain extends MainTe
//TODO: I cannot figure out why when log file is not created in this testcase,
it works when running in Launcher
// Properties props = new Properties();
// props.load(new FileReader(outputDataFile));
-// assertTrue(props.containsKey("hadoopJobs"));
-// assertTrue(props.getProperty("hadoopJobs").trim().length() > 0);
+// assertTrue(props.containsKey(LauncherMain.HADOOP_JOBS));
+//
assertTrue(props.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 0);
}
return null;
}
Modified:
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestSqoopActionExecutor.java
URL:
http://svn.apache.org/viewvc/incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestSqoopActionExecutor.java?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
---
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestSqoopActionExecutor.java
(original)
+++
incubator/oozie/trunk/core/src/test/java/org/apache/oozie/action/hadoop/TestSqoopActionExecutor.java
Wed Feb 22 19:52:30 2012
@@ -195,8 +195,8 @@ public class TestSqoopActionExecutor ext
assertNotNull(context.getAction().getData());
Properties outputData = new Properties();
outputData.load(new StringReader(context.getAction().getData()));
- assertTrue(outputData.containsKey("hadoopJobs"));
- assertTrue(outputData.getProperty("hadoopJobs").trim().length() > 0);
+ assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
+
assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() >
0);
}
public void testSqoopActionFreeFormQuery() throws Exception {
@@ -244,8 +244,8 @@ public class TestSqoopActionExecutor ext
assertNotNull(context.getAction().getData());
Properties outputData = new Properties();
outputData.load(new StringReader(context.getAction().getData()));
- assertTrue(outputData.containsKey("hadoopJobs"));
- assertTrue(outputData.getProperty("hadoopJobs").trim().length() > 0);
+ assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
+
assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() >
0);
}
Modified: incubator/oozie/trunk/release-log.txt
URL:
http://svn.apache.org/viewvc/incubator/oozie/trunk/release-log.txt?rev=1292480&r1=1292479&r2=1292480&view=diff
==============================================================================
--- incubator/oozie/trunk/release-log.txt (original)
+++ incubator/oozie/trunk/release-log.txt Wed Feb 22 19:52:30 2012
@@ -1,5 +1,6 @@
-- Oozie 3.2.0 release
+OOZIE-703 Improve/Consolidate Hadoop job ID log harvesting logic (tucu)
OOZIE-702 XTestCase minicluster fails with Hadoop 1.x (tucu)
OOZIE-700 update hadooplibs versions (tucu)
OOZIE-698 Make sharelib components version dependency configurable (tucu)